├── .gitignore
├── README.md
├── Old
    ├── main.jl
    ├── MurphykModule.jl
    ├── sklearn-demo.jl
    ├── turing-demo.jl
    ├── gauss1d-gibbs.jl
    ├── second-order.jl
    ├── tmp.jl
    ├── utils.jl
    ├── kalman_tracking_demo.jl
    ├── zigzag.jl
    ├── flux-demo.jl
    ├── logreg-opt-demo.jl
    ├── logreg-binary-opt-demo.jl
    ├── logreg-binary-opt-old.jl
    ├── rosenbrock-opt-demo.jl
    ├── dummy.jl
    ├── kalman_qe.jl
    ├── first-order.jl
    ├── code_breaker.jl
    ├── indexing.jl
    └── support_code.jl
├── zygote_demo.jl
├── julia-1-4-colab.ipynb
├── julia-install.txt
├── FactorGraphs
    ├── gauss-bayes-rule-multi.jl
    ├── gauss-bayes-rule.jl
    ├── compose_nodes.jl
    ├── kalman_smoother.jl
    ├── kalman_smoother_1d.jl
    ├── vb_unigauss.jl
    ├── ep_discrete_kf.jl
    ├── tmp.jl
    ├── kalman_filter.jl
    ├── kalman_filter_1d.jl
    ├── lin_probit_regression_corrupted_classlabels.jl
    ├── vb_gmm.jl
    ├── gauss-bayes-rule-2d.jl
    ├── structured_mean_field.jl
    ├── hmm.jl
    └── kalman_smoother_2d.jl
└── notes.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | *~


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # jpml
2 | Julia code for Probabilistic Machine Learning
3 | 


--------------------------------------------------------------------------------
/Old/main.jl:
--------------------------------------------------------------------------------
1 | include("utils.jl")
2 | using MurphykModule
3 | 
4 | println(square(2))
5 | println(normalize_probdist([1,2,3]))
6 | 


--------------------------------------------------------------------------------
/Old/MurphykModule.jl:
--------------------------------------------------------------------------------
 1 | module MurphykModule
 2 | 
 3 | export square
 4 | 
 5 | square(x) = x^2
 6 | 
 7 | cube(x) = x^3
 8 | 
 9 | end
10 | 


--------------------------------------------------------------------------------
/zygote_demo.jl:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | function f(x,w,n)
 4 |     a = 0.0
 5 |     for i=1:n
 6 |         a += w'*x;
 7 |     end
 8 |     return a;
 9 | end
10 | f2(x,w,n) = n*w'*x;
11 | 
12 | D = 3;
13 | w = rand(D); x = rand(D);
14 | @assert isapprox(f(x,w,n), f2(x,w,n))
15 | 
16 | # df/dx = n*w, df/dw = n*x
17 | g = gradient(f,x,w,n)[2] # gradient returns (df/dx, df/dw)
18 | g2 = n*x
19 | @assert isapprox(g, g2)
20 | 
21 | println("time :")
22 | n = 10000
23 | @time gradient(f, x, w, n)
24 | 
25 | println("done")
26 | 


--------------------------------------------------------------------------------
/julia-1-4-colab.ipynb:
--------------------------------------------------------------------------------
 1 | 
 2 | {
 3 |   "nbformat": 4,
 4 |   "nbformat_minor": 0,
 5 |   "metadata": {
 6 |     "colab": {
 7 |       "name": "Julia on Colab.ipynb",
 8 |       "version": "0.3.2",
 9 |       "provenance": []
10 |     },
11 |     "kernelspec": {
12 |       "name": "julia-1.4",
13 |       "display_name": "Julia 1.4"
14 |     },
15 |     "accelerator": "GPU"
16 |   },
17 |   "cells": [
18 |     {
19 |       "metadata": {
20 |         "id": "oMSuTc3pDlHv",
21 |         "colab_type": "code",
22 |         "colab": {}
23 |       },
24 |       "cell_type": "code",
25 |       "source": [
26 |         ""
27 |       ],
28 |       "execution_count": 0,
29 |       "outputs": []
30 |     }
31 |   ]
32 | }


--------------------------------------------------------------------------------
/julia-install.txt:
--------------------------------------------------------------------------------
 1 | #https://github.com/Dsantra92/Julia-on-Colab/blob/master/julia_on_collab.ipynb
 2 | #https://stackoverflow.com/questions/58270424/julia-in-google-colab
 3 | 
 4 | !curl -sSL "https://julialang-s3.julialang.org/bin/linux/x64/1.4/julia-1.4.0-linux-x86_64.tar.gz" -o julia.tar.gz
 5 | !tar -xzf julia.tar.gz -C /usr --strip-components 1
 6 | !rm -rf julia.tar.gz*
 7 | !julia -e 'using Pkg; pkg"add IJulia; precompile"'
 8 | 
 9 | 
10 | 
11 | using Pkg
12 | pkg"add BenchmarkTools; precompile;"
13 | pkg"add CuArrays; precompile;"
14 | 
15 | using BenchmarkTools
16 | mcpu = rand(2^10, 2^10)
17 | @benchmark mcpu*mcpu
18 | 
19 | 
20 | using CuArrays
21 | mgpu = cu(mcpu)
22 | @benchmark CuArrays.@sync mgpu*mgpu
23 | 
24 | 
25 | 
26 | 
27 | 
28 | !julia -e 'using Pkg; pkg"add IJulia; add CuArrays; add Flux; precompile"'
29 | 
30 | 


--------------------------------------------------------------------------------
/FactorGraphs/gauss-bayes-rule-multi.jl:
--------------------------------------------------------------------------------
 1 | # Bayes rule for a linear Gaussian model
 2 | # X -> Y, observe Y, infer X
 3 | # Y ~ N(A x + b, R)
 4 | # X ~ N(0, Q)
 5 | 
 6 | #https://github.com/biaslab/ForneyLab.jl/issues/17#issuecomment-460290360
 7 | 
 8 | using ForneyLab, LinearAlgebra
 9 | 
10 | function make_model()
11 | g = FactorGraph()
12 | 
13 | nhidden = 4
14 | nobs = 2
15 | A = [1 0 0 0; 0 1 0 0]
16 | b = zeros(Float64, nobs)
17 | Q = eye(nhidden)
18 | R = 0.1*eye(nobs)
19 | y_data = randn(nobs)
20 | 
21 | @RV x ~ GaussianMeanVariance(zeros(nhidden), Q)
22 | @RV obs_noise ~ GaussianMeanVariance(zeros(nobs), R)
23 | @RV y = A*x + b + obs_noise
24 | placeholder(y, :y, dims=(nobs,)) # add clamping
25 | end
26 | 
27 | algo = Meta.parse(sumProductAlgorithm(x))
28 | eval(algo) # Load algorithm
29 | data = Dict(:y     => y_data)
30 | marginals = step!(data);
31 | 


--------------------------------------------------------------------------------
/Old/sklearn-demo.jl:
--------------------------------------------------------------------------------
 1 | #https://scikitlearnjl.readthedocs.io/en/latest/quickstart/
 2 | 
 3 | using ScikitLearn
 4 | using RDatasets: dataset
 5 | 
 6 | iris = dataset("datasets", "iris")
 7 | 
 8 | # ScikitLearn.jl expects arrays, but DataFrames can also be used - see
 9 | # the corresponding section of the manual
10 | X = convert(Array, iris[[:SepalLength, :SepalWidth, :PetalLength, :PetalWidth]])
11 | y = convert(Array, iris[:Species])
12 | 
13 | @sk_import linear_model: LogisticRegression
14 | model = LogisticRegression(fit_intercept=true)
15 | fit!(model, X, y)
16 | 
17 | accuracy = sum(predict(model, X) .== y) / length(y)
18 | println("accuracy: $accuracy")
19 | 
20 | using ScikitLearn.CrossValidation: cross_val_score
21 | 
22 | # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_validation.py
23 | cross_val_score(LogisticRegression(), X, y; cv=5)  # 5-fold
24 | 


--------------------------------------------------------------------------------
/Old/turing-demo.jl:
--------------------------------------------------------------------------------
 1 | using Turing
 2 | using StatsPlots
 3 | 
 4 | # Define a simple Normal model with unknown mean and variance.
 5 | @model gdemo(x, y) = begin
 6 |   s ~ InverseGamma(2,3)
 7 |   m ~ Normal(0,sqrt(s))
 8 |   x ~ Normal(m, sqrt(s))
 9 |   y ~ Normal(m, sqrt(s))
10 | end
11 | 
12 | #  Run sampler, collect results
13 | niters = 10
14 | nparticles = 5
15 | x = 1.5; y = 2;
16 | @info "Starting SMC"
17 | c1 = sample(gdemo(x, y), SMC(niters))
18 | @info "Starting PG"
19 | c2 = sample(gdemo(x, y), PG(nparticles, niters))
20 | @info "Starting HMC"
21 | c3 = sample(gdemo(x, y), HMC(niters, 0.1, 5))
22 | @info "Staring Gibbs"
23 | c4 = sample(gdemo(x, y), Gibbs(niters, PG(10, 2, :m), HMC(2, 0.1, 5, :s)))
24 | @info "starting HMCDA"
25 | c5 = sample(gdemo(x, y), HMCDA(niters, 0.15, 0.65))
26 | @info "Starting NUTS"
27 | c6 = sample(gdemo(x, y), NUTS(niters,  0.65))
28 | @info "Done"
29 | # Summarise results (currently requires the master branch from MCMCChain)
30 | describe(c3)
31 | 
32 | # Plot and save results
33 | p = plot(c3)
34 | savefig("gdemo-plot.png")
35 | 


--------------------------------------------------------------------------------
/Old/gauss1d-gibbs.jl:
--------------------------------------------------------------------------------
 1 | #https://discourse.julialang.org/t/learning-bayesian-data-analysis-using-julia/5370/14
 2 | 
 3 | # mu ~ N(mu0, 1/tau0)
 4 | # 1/sigma^2 ~ Ga(alpha, beta)
 5 | # xn ~ N(mu, sigma)
 6 | 
 7 | using Distributions
 8 | 
 9 | function mc(x, iterations, μ0 = 0., τ0 = 1e-7, α = 0.0001, β = 0.0001)
10 |     n = length(x)
11 |     sumx = sum(x)
12 |     μ, τ = sumx/n, 1/var(x)
13 |     μs, τs = Float64[], Float64[]
14 |     for i in 1:iterations
15 |         μ = rand(Normal((τ0*μ0 + τ*sumx)/(τ0 + n*τ), 1/sqrt(τ0 + n*τ)))
16 |         τ = rand(Gamma(α + n/2, 1/(β + 0.5*sum((xᵢ-μ)^2 for xᵢ in x))))
17 |         push!(μs, μ)
18 |         push!(τs, τ)
19 |     end
20 |     μs, τs
21 | end
22 | 
23 | n = 100
24 | mu = 0.2
25 | sigma = 1.7^(-0.5)
26 | x = rand(Normal(mu, sigma), n)
27 | nsamples = 1000
28 | μs, τs = mc(x, nsamples)
29 | println("mean μ = ", mean(μs), " ± ", std(μs), ", truth = ", mu) 
30 | @printf("mean μ = %5.3f,  ± std(μs) = %5.3f, truth = %5.3f", mean(μs), std(μs), mu) 
31 | println("precision τ = ", mean(τs), " ± ", std(τs), ", truth = ", 1/sigma) 


--------------------------------------------------------------------------------
/FactorGraphs/gauss-bayes-rule.jl:
--------------------------------------------------------------------------------
 1 | # Bayes rule for a linear Gaussian model
 2 | # X -> Y, observe Y, infer X
 3 | # Y ~ N(a x + b, R)
 4 | # X ~ N(0, Q)
 5 | 
 6 | using ForneyLab, LinearAlgebra
 7 | 
 8 | 
 9 | ###################### Scalar example
10 | g = FactorGraph()
11 | A = 0.2
12 | b = 0.0
13 | Q = 1
14 | R = 0.
15 | y_data = 1.2
16 | 
17 | @RV x ~ GaussianMeanVariance(0.0, Q)
18 | @RV n_t ~ GaussianMeanVariance(0.0, R)
19 | @RV y = A*x + b + n_t
20 | placeholder(y, :y)
21 | 
22 | println("generating inference code")
23 | algo = Meta.parse(sumProductAlgorithm(x))
24 | println("Compiling")
25 | eval(algo) # Load algorithm
26 | 
27 | data = Dict(:y     => y_data)
28 | 
29 | println("running inference")
30 | marginals = step!(data);
31 | 
32 | ####################
33 | # Multivariate example
34 | #https://github.com/biaslab/ForneyLab.jl/issues/17
35 | 
36 | 
37 | g = FactorGraph()
38 | 
39 | nhidden = 4
40 | nobs = 2
41 | A = [1 0 0 0;
42 |      0 1 0 0]
43 | b = zeros(Float64, nobs)
44 | Q = eye(nhidden)
45 | R = 0.1*eye(nobs)
46 | y_data = randn(nobs)
47 | 
48 | @RV x ~ GaussianMeanVariance(zeros(nhidden), Q)
49 | @RV obs_noise ~ GaussianMeanVariance(zeros(nobs), R)
50 | @RV y = A*x + b + obs_noise
51 | placeholder(y, :y, dims=(nobs,)) # add clamping
52 | 
53 | algo = Meta.parse(sumProductAlgorithm(x))
54 | eval(algo) # Load algorithm
55 | data = Dict(:y     => y_data)
56 | marginals = step!(data);
57 | post = marginals[:x]
58 | 


--------------------------------------------------------------------------------
/FactorGraphs/compose_nodes.jl:
--------------------------------------------------------------------------------
 1 | 
 2 | # Composite nodes
 3 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/6_composite_nodes.ipynb
 4 | 
 5 | using ForneyLab
 6 | 
 7 | # Define factor graph for x1 = x0 + b*u1, where x0 and u1 have Gaussian priors, and b is a constant.
 8 | # This is a part of the information filter graph from the introduction.
 9 | g = FactorGraph()
10 | 
11 | b = [1.0; 0.5]
12 | 
13 | @RV x_0 ~ GaussianMeanVariance(ones(2), eye(2))
14 | @RV u_1 ~ GaussianMeanVariance(1.0, 1.0)
15 | @RV x_1 = x_0 + b*u_1;
16 | 
17 | flat_schedule = sumProductSchedule(x_1)
18 | 
19 | ForneyLab.draw(g, schedule=flat_schedule) # Inspect the resulting schedule
20 | 
21 | println(flat_schedule)
22 | 
23 | # Define a composite node for z = x + b*y
24 | @composite GainAddition (z, x, y) begin
25 |     # Specify the 'internal factor graph' of the GainAddion composite node.
26 |     # z, x, and y can be used as if they are existing Variables in this block.
27 |     b = [1.0; 0.5]
28 | 
29 |     @RV z = x + b*y
30 | end
31 | 
32 | g2 = FactorGraph()
33 | 
34 | @RV x_0 ~ GaussianMeanVariance(ones(2), eye(2))
35 | @RV u_1 ~ GaussianMeanVariance(1.0, 1.0)
36 | @RV x_1 ~ GainAddition(x_0, u_1);
37 | 
38 | composite_schedule = sumProductSchedule(x_1)
39 | 
40 | ForneyLab.draw(g2, schedule=composite_schedule)
41 | 
42 | println(composite_schedule)
43 | 
44 | println(composite_schedule[end].internal_schedule)
45 | 
46 | @sumProductRule(:node_type     => GainAddition,                                 # our custom composite node
47 |                 :outbound_type => Message{GaussianMeanPrecision},               # this rule produces a GaussianMeanPrecision msg
48 |                 :inbound_types => (Nothing, Message{Gaussian}, Message{Gaussian}), # msg towards first interface, incoming types
49 |                 :name          => SPGainAdditionOutVGG)                         # name of the update rule;
50 | 
51 | shortcut_schedule = sumProductSchedule(x_1)
52 | println(shortcut_schedule)
53 | 


--------------------------------------------------------------------------------
/FactorGraphs/kalman_smoother.jl:
--------------------------------------------------------------------------------
 1 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/2_state_estimation_forward_backward.ipynb
 2 | # Forward backward smoothing
 3 | 
 4 | # Data
 5 | Random.seed!(1)
 6 | n_samples = 20
 7 | x_data = [t for t=1:n_samples] # State
 8 | y_data = x_data + sqrt(200.0)*randn(n_samples); # Noisy observations of state
 9 | 
10 | using ForneyLab
11 | 
12 | g = FactorGraph()
13 | 
14 | # Prior statistics
15 | m_x_0 = placeholder(:m_x_0)
16 | v_x_0 = placeholder(:v_x_0)
17 | 
18 | # State prior
19 | @RV x_0 ~ GaussianMeanVariance(m_x_0, v_x_0)
20 | 
21 | # Transition and observation model
22 | x = Vector{Variable}(undef, n_samples)
23 | y = Vector{Variable}(undef, n_samples)
24 | 
25 | x_t = x_0
26 | for t = 1:n_samples
27 |     global x_t
28 |     @RV n_t ~ GaussianMeanVariance(0.0, 200.0) # observation noise
29 |     @RV x[t] = x_t + 1.0
30 |     # Name variable for ease of lookup
31 |     sym  = Symbol("x_", t)
32 |     x[t].id = sym #:x_t;
33 |     println(sym)
34 |     @RV y[t] = x[t] + n_t
35 | 
36 |     # Data placeholder
37 |     placeholder(y[t], :y, index=t)
38 | 
39 |     # Reset state for next step
40 |     x_t = x[t]
41 | end
42 | 
43 | println("generating inference code")
44 | algo = Meta.parse(sumProductAlgorithm(x))
45 | println("Compiling")
46 | eval(algo) # Load algorithm
47 | 
48 | # Prepare data dictionary and prior statistics
49 | data = Dict(:y     => y_data,
50 |             :m_x_0 => 0.0,
51 |             :v_x_0 => 1000.0)
52 | 
53 | # Execute algorithm
54 | println("running forwards backwads")
55 | marginals = step!(data);
56 | # Extract posterior statistics
57 | m_x = [mean(marginals[:x_*t]) for t = 1:n_samples]
58 | v_x = [var(marginals[:x_*t]) for t = 1:n_samples]
59 | 
60 | using Plots; pyplot()
61 | 
62 | xs = 1:n_samples
63 | scatter(xs, y_data, color=[:blue], label="y")
64 | plot!(xs, x_data, color=[:black], label="x")
65 | plot!(xs, m_x, color=[:red], label="estimated x")
66 | #grid("on")
67 | ys = m_x; l = sqrt.(v_x); u = sqrt.(v_x);
68 | #fill_between(xs, y.-l, y.+_u, color="b", alpha=0.3)
69 | plot!([ys ys], fillrange=[ys.-l ys.+u], fillalpha=0.3, c=:red)
70 | xlabel!("t")
71 | fname = "Figures/forney-kalman-smoother.png"
72 | savefig(fname)
73 | 


--------------------------------------------------------------------------------
/FactorGraphs/kalman_smoother_1d.jl:
--------------------------------------------------------------------------------
 1 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/2_state_estimation_forward_backward.ipynb
 2 | # Forward backward smoothing
 3 | 
 4 | # Data
 5 | Random.seed!(1)
 6 | n_samples = 10
 7 | x_data = [t for t=1:n_samples] # State
 8 | y_data = x_data + sqrt(200.0)*randn(n_samples); # Noisy observations of state
 9 | 
10 | using ForneyLab
11 | 
12 | g = FactorGraph()
13 | 
14 | # Prior statistics
15 | #m_x_0 = placeholder(:m_x_0)
16 | #v_x_0 = placeholder(:v_x_0)
17 | #@RV x_0 ~ GaussianMeanVariance(m_x_0, v_x_0)
18 | @RV x_0 ~ GaussianMeanVariance(0, 1000)
19 | 
20 | # Transition and observation model
21 | x = Vector{Variable}(undef, n_samples)
22 | y = Vector{Variable}(undef, n_samples)
23 | 
24 | x_prev = x_0
25 | for t = 1:n_samples
26 |     global x_prev
27 |     @RV n_t ~ GaussianMeanVariance(0.0, 200.0) # observation noise
28 |     @RV x[t] = x_prev + 1.0
29 |     # Name variable for ease of lookup
30 |     sym  = Symbol("x_", t)
31 |     x[t].id = sym #:x_t;
32 |     @RV y[t] = x[t] + n_t
33 | 
34 |     # Data placeholder
35 |     placeholder(y[t], :y, index=t)
36 | 
37 |     # Reset state for next step
38 |     x_prev = x[t]
39 | end
40 | 
41 | println("generating inference code")
42 | algo = Meta.parse(sumProductAlgorithm(x))
43 | println("Compiling")
44 | eval(algo) # Load algorithm
45 | 
46 | # Prepare data dictionary and prior statistics
47 | data = Dict(:y     => y_data)
48 | 
49 | #=
50 | data = Dict(:y     => y_data,
51 |             :m_x_0 => 0.0,
52 |             :v_x_0 => 1000.0)
53 | =#
54 | 
55 | # Execute algorithm
56 | println("running forwards backwads")
57 | marginals = step!(data);
58 | # Extract posterior statistics
59 | m_x = [mean(marginals[:x_*t]) for t = 1:n_samples]
60 | v_x = [var(marginals[:x_*t]) for t = 1:n_samples]
61 | 
62 | 
63 | 
64 | import Plots;
65 | # pyplot()
66 | xs = 1:n_samples
67 | Plots.scatter(xs, y_data, color=[:blue], label="y")
68 | Plots.plot!(xs, x_data, color=[:black], label="x")
69 | Plots.plot!(xs, m_x, color=[:red], label="estimated x")
70 | #grid("on")
71 | ys = m_x; l = sqrt.(v_x); u = sqrt.(v_x);
72 | #fill_between(xs, y.-l, y.+_u, color="b", alpha=0.3)
73 | Plots.plot!([ys ys], fillrange=[ys.-l ys.+u], fillalpha=0.3, c=:red)
74 | Plots.xlabel!("t")
75 | gcf()
76 | 


--------------------------------------------------------------------------------
/FactorGraphs/vb_unigauss.jl:
--------------------------------------------------------------------------------
 1 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/3_variational_estimation_iid_gaussian.ipynb
 2 | # Variational Bayes for a 1d Gaussian
 3 | 
 4 | # Generate toy data set
 5 | Random.seed!(1)
 6 | n = 5
 7 | m_data = 3.0
 8 | w_data = 4.0
 9 | y_data = sqrt(1/w_data)*randn(n) .+ m_data;
10 | 
11 | using(ForneyLab)
12 | 
13 | g = FactorGraph()
14 | 
15 | # Priors
16 | @RV m ~ GaussianMeanVariance(0.0, 100.0)
17 | @RV w ~ Gamma(0.01, 0.01)
18 | 
19 | # Observarion model
20 | y = Vector{Variable}(undef, n)
21 | for i = 1:n
22 |     @RV y[i] ~ GaussianMeanPrecision(m, w)
23 |     placeholder(y[i], :y, index=i)
24 | end
25 | 
26 | # Specify recognition factorization
27 | q = RecognitionFactorization(m, w, ids=[:M, :W])
28 | 
29 | # Inspect the subgraph for m
30 | #ForneyLab.draw(q.recognition_factors[:M])
31 | 
32 | # Generate the variational update algorithms for each recognition factor
33 | algo = variationalAlgorithm(q)
34 | println(algo)
35 | 
36 | algo_F = freeEnergyAlgorithm(q)
37 | println(algo_F)
38 | 
39 | # Load algorithms
40 | eval(Meta.parse(algo))
41 | eval(Meta.parse(algo_F));
42 | 
43 | data = Dict(:y => y_data)
44 | 
45 | # Initial recognition distributions
46 | marginals = Dict(:m => vague(GaussianMeanVariance),
47 |                  :w => vague(Gamma))
48 | 
49 | n_its = 2*n
50 | F = Vector{Float64}(undef, n_its) # Initialize vector for storing Free energy
51 | m_est = Vector{Float64}(undef, n_its)
52 | w_est = Vector{Float64}(undef, n_its)
53 | for i = 1:n_its
54 |     stepM!(data, marginals)
55 |     stepW!(data, marginals)
56 | 
57 |     # Store free energy
58 |     F[i] = freeEnergy(data, marginals)
59 | 
60 |     # Store intermediate estimates
61 |     m_est[i] = mean(marginals[:m])
62 |     w_est[i] = mean(marginals[:w])
63 | end
64 | 
65 | 
66 | 
67 | using Plots; pyplot()
68 | 
69 | # Plot free energy to check for convergence
70 | plot(1:n_its, F, c=:black, lab="Free energy")
71 | xlabel!("Iteration")
72 | ylabel!("F")
73 | 
74 | # Plot estimated mean
75 | ys = m_est; l = 1.0 ./ sqrt.(w_est); u = 1.0 ./ sqrt.(w_est);
76 | plot([ys ys], fillrange=[ys.-l ys.+u], fillalpha=0.3, c=:red, lab="μ")
77 | ylabel!("Estimate")
78 | m_mle = mean(y_data)
79 | m_emp = fill(m_mle, n_its)
80 | plot!(1:n_its, m_emp, c=:blue, lab="\overline{μ}")
81 | 


--------------------------------------------------------------------------------
/notes.txt:
--------------------------------------------------------------------------------
 1 | yc1 = full(sparse(y,1:100,1f0))
 2 | 
 3 | # Weight initialization for multiple layers: h=array of layer sizes
 4 | # Output is an array [w0,b0,w1,b1,...,wn,bn] where wi,bi is the weight matrix and bias vector for the i'th layer
 5 | function winit(h...)  # use winit(x,h1,h2,...,hn,y) for n hidden layer model
 6 |     w = Any[]
 7 |     for i=2:length(h)
 8 |         push!(w, xavier(h[i],h[i-1]))
 9 |         push!(w, zeros(h[i],1))
10 |     end
11 |     map(Atype, w)
12 | end;
13 | 
14 | function convnet(w,x; pdrop=(0,0,0))    # pdrop[1]:input, pdrop[2]:conv, pdrop[3]:fc
15 |     for i=1:2:length(w)
16 |         if ndims(w[i]) == 4     # convolutional layer
17 |             x = dropout(x, pdrop[i==1?1:2])
18 |             x = conv4(w[i],x) .+ w[i+1]
19 |             x = pool(relu.(x))
20 |         elseif ndims(w[i]) == 2 # fully connected layer
21 |             x = dropout(x, pdrop[i==1?1:3])
22 |             x = w[i]*mat(x) .+ w[i+1]
23 |             if i < length(w)-1; x = relu.(x); end
24 |         else
25 |             error("Unknown layer type: $(size(w[i]))")
26 |         end
27 |     end
28 |     return x
29 | end;
30 | 
31 | # Read the imdb dictionary and print the words
32 | imdbvocab = Array{String}(length(imdbdict))
33 | for (k,v) in imdbdict; imdbvocab[v]=k; end
34 | map(a->imdbvocab[a], xtrn)
35 | 
36 | function onehotrows(idx, embeddings)
37 |     nrows,ncols = length(idx), size(embeddings,1)
38 |     z = zeros(Float32,nrows,ncols)
39 |     @inbounds for i=1:nrows
40 |         z[i,idx[i]] = 1
41 |     end
42 |     oftype(AutoGrad.getval(embeddings),z)
43 | end
44 | 
45 | #"/home/kpmurphy/.julia/conda/3/bin"
46 | # push!(LOAD_PATH, 
47 | 
48 | 
49 | https://github.com/TuringLang/TuringTutorials/blob/master/3_BayesNN.ipynb
50 | 
51 | https://discourse.julialang.org/t/learning-bayesian-data-analysis-using-julia/5370/23
52 | 
53 | https://discourse.julialang.org/t/psa-replacement-of-ind2sub-sub2ind-in-julia-0-7/14666
54 | 
55 | RSA key with id efb66595564c5646f8fae19c4d619d60bc6b850d not found
56 | 
57 | soss.jl: https://cscherrer.github.io/
58 | mxnet J1.0: https://github.com/apache/incubator-mxnet/issues/13836
59 | 
60 | https://julialang.org/learning/
61 | http://ucidatascienceinitiative.github.io/IntroToJulia/
62 | 
63 | using NBInclude
64 | @nbinclude("myfile.ipynb")
65 | 


--------------------------------------------------------------------------------
/FactorGraphs/ep_discrete_kf.jl:
--------------------------------------------------------------------------------
 1 | 
 2 | # Expectation propogation for a 1D linear Gaussian SSM with binary observations
 3 | 
 4 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/5_expectation_propagation.ipynb
 5 | 
 6 | 
 7 | using SpecialFunctions
 8 | using Random
 9 | # Generate data set
10 | 
11 | Random.seed!(123)
12 | n_samples = 40
13 | σ(x) = 0.5 + 0.5*erf.(x./sqrt(2))
14 | 
15 | u_data = 0.1
16 | x_data = []
17 | y_data = []
18 | x_prev = -2.0
19 | for t=1:n_samples
20 |     global x_prev
21 |     push!(x_data, x_prev + u_data + sqrt(0.01)*randn()) # State transition
22 |     push!(y_data, σ(x_data[end]) > rand()); # Observation
23 |     x_prev = x_data[end]
24 | end
25 | 
26 | using ForneyLab
27 | 
28 | g = FactorGraph()
29 | 
30 | # State prior
31 | @RV x_0 ~ GaussianMeanVariance(0.0, 100.0)
32 | 
33 | x = Vector{Variable}(undef, n_samples)
34 | d = Vector{Variable}(undef, n_samples)
35 | y = Vector{Variable}(undef, n_samples)
36 | x_t_min = x_0
37 | for t = 1:n_samples
38 |     global x_t_min
39 |     @RV d[t] ~ GaussianMeanVariance(u_data, 0.01)
40 |     @RV x[t] = x_t_min + d[t]
41 |     @RV y[t] ~ Sigmoid(x[t])
42 | 
43 |     # Data placeholder
44 |     placeholder(y[t], :y, index=t)
45 | 
46 |     # Reset state for next step
47 |     x_t_min = x[t]
48 | end
49 | 
50 | println("generating code")
51 | algo = expectationPropagationAlgorithm(x);
52 | println(algo) # Uncomment to inspect algorithm code
53 | 
54 | println("compiling code")
55 | eval(Meta.parse(algo));
56 | 
57 | messages = init()
58 | marginals = Dict()
59 | data = Dict(:y => y_data)
60 | 
61 | n_its = 4*n_samples
62 | for i = 1:n_its
63 |     println("iteration $i")
64 |    step!(data, marginals, messages)
65 | end
66 | 
67 | 
68 | using PyPlot
69 | 
70 | clf()
71 | # Extract posterior statistics
72 | m_x = [mean(marginals[:x_*t]) for t = 1:n_samples]
73 | v_x = [var(marginals[:x_*t]) for t = 1:n_samples]
74 | 
75 | plot(collect(1:n_samples), x_data, "k--", label="true x")
76 | plot(collect(1:n_samples), m_x, "b-", label="estimated x")
77 | fill_between(collect(1:n_samples), m_x-sqrt.(v_x), m_x+sqrt.(v_x), color="b", alpha=0.3);
78 | grid("on")
79 | xlabel("t")
80 | xlim(1, n_samples)
81 | ylim(-2, 2)
82 | legend(loc=7)
83 | 
84 | ax = gca()
85 | ax[:twinx]()
86 | plot(collect(1:n_samples), y_data, "b*", label="y")
87 | yticks([0.0, 1.0], ["False", "True"]);
88 | ylim(-0.1, 1.1);
89 | gcf()
90 | 


--------------------------------------------------------------------------------
/Old/second-order.jl:
--------------------------------------------------------------------------------
 1 | # Based on
 2 | #https://github.com/sisl/algforopt-notebooks/blob/master/second-order.ipynb
 3 | 
 4 | using LinearAlgebra
 5 | 
 6 | include("first-order.jl")
 7 | 
 8 | function _line_search(f, x, d)
 9 | 	d = normalize(d)
10 | 	objective = α -> f(x + α*d)
11 | 	v, α = f(x), 1e-6
12 | 	while f(x + α*d) < v
13 | 		v = f(x + α*d)
14 | 		α += 1e-6
15 | 	end
16 | 	return x + α*d
17 | end
18 | 
19 | #abstract type DescentMethod end
20 | mutable struct DFP <: DescentMethod
21 | 	Q::Matrix{Real}
22 | end
23 | DFP() = DFP(Matrix{Real}(undef, 0, 0))
24 | function init!(M::DFP, f, ∇f, x)
25 | 	M.Q = Matrix(1.0I, length(x), length(x))
26 | 	return M
27 | end
28 | function step!(M::DFP, f, ∇f, x)
29 | 	Q, g = M.Q, ∇f(x)
30 | 	x′ = _line_search(f, x, -Q*g)
31 | 	g′ = ∇f(x′)
32 | 	δ = x′ - x
33 |     γ = g′ - g
34 |     Q[:] = Q - Q*γ*γ'*Q/(γ'*Q*γ) + δ*δ'/(δ'*γ)
35 |     return x′
36 | end
37 | 
38 | mutable struct BFGS <: DescentMethod
39 | 	Q::Matrix{Real}
40 | end
41 | BFGS() = BFGS(Matrix{Real}(undef, 0, 0))
42 | function init!(M::BFGS, f, ∇f, x)
43 | 	M.Q = Matrix(1.0I, length(x), length(x))
44 | 	return M
45 | end
46 | function step!(M::BFGS, f, ∇f, x)
47 | 	Q, g = M.Q, ∇f(x)
48 | 	x′ = _line_search(f, x, -Q*g)
49 | 	g′ = ∇f(x′)
50 | 	δ = x′ - x
51 |     γ = g′ - g
52 |     Q[:] = Q - (δ*γ'*Q + Q*γ*δ')/(δ'*γ) + (1 + (γ'*Q*γ)/(δ'*γ))[1]*(δ*δ')/(δ'*γ)
53 |     return x′
54 | end
55 | 
56 | mutable struct LBFGS <: DescentMethod
57 | 	m::Int # memory length
58 | 	δs::Array{Vector{Real},1}
59 | 	γs::Array{Vector{Real},1}
60 | 	qs::Array{Vector{Real},1}
61 | end
62 | LBFGS(m) = LBFGS(m, Array{Vector{Real},1}(), Array{Vector{Real},1}(), Array{Vector{Real},1}())
63 | function init!(M::LBFGS, f, ∇f, x)
64 | 	M.δs = []
65 | 	M.γs = []
66 |     M.qs = []
67 | 	return M
68 | end
69 | function step!(M::LBFGS, f, ∇f, x)
70 |     δs, γs, qs, g = M.δs, M.γs, M.qs, ∇f(x)
71 |     m = length(δs)
72 |     if m > 0
73 |         q = g
74 |         for i in m : -1 : 1
75 |             qs[i] = copy(q)
76 |             q -= (δs[i]⋅q)/(γs[i]⋅δs[i])*γs[i]
77 |         end
78 |         z = (γs[m] .* δs[m] .* q) / (γs[m]⋅γs[m])
79 |         for i in 1 : m
80 |             z += δs[i]*(δs[i]⋅qs[i] - γs[i]⋅z)/(γs[i]⋅δs[i])
81 |         end
82 |         x′ = _line_search(f, x, -z)
83 |     else
84 |         x′ = _line_search(f, x, -g)
85 |     end
86 |     g′ = ∇f(x′)
87 |     push!(δs, x′ - x);
88 | 	push!(γs, g′ - g)
89 |     push!(qs, zeros(length(x)))
90 |     while length(δs) > M.m
91 |         popfirst!(δs); popfirst!(γs); popfirst!(qs)
92 |     end
93 |     return x′
94 | end
95 | 


--------------------------------------------------------------------------------
/Old/tmp.jl:
--------------------------------------------------------------------------------
 1 | # Examples of how to fit a binary logistic regression model
 2 | 
 3 | #https://fluxml.ai/Flux.jl/v0.1.1/examples/logreg.html
 4 | #http://marubon-ds.blogspot.com/2018/05/deep-learning-with-julia-introduction.html
 5 | 
 6 | #https://github.com/FluxML/Flux.jl/blob/1cf37ab9eb806368d5702ab8053d5bc5b46180cf/src/layers/stateless.jl
 7 | 
 8 | #https://github.com/FluxML/NNlib.jl/blob/master/src/logsoftmax.jl
 9 | 
10 | #https://www.linkedin.com/pulse/creating-deep-neural-network-model-learn-handwritten-digits-mike-gold/
11 | 
12 | 
13 | import Flux
14 | 
15 | 
16 | function logreg_binary_sample(w::Array{Float64,1}, X::Array{Float64,2})
17 | 	DX, N = size(X)
18 | 	DW = length(w)
19 | 	@assert DX == DW
20 | 	p = [Flux.sigmoid(sum(w .* X[:,i])) for i in 1:N]
21 | 	labels = p .< rand(N)
22 | 	#W = reshape(w, (1, DW))
23 | 	#labels = (vec(Flux.sigmoid.(W * X)) .< rand(N))
24 | 	return labels
25 | end
26 | 
27 | 
28 | function make_test_data_binary()
29 | 	Random.seed!(1)
30 | 	D = 4;
31 | 	N = 5;
32 | 	X = randn(D, N) # note that examples are stored in the columns
33 | 	w = randn(D)
34 | 	y = logreg_binary_sample(w, X)
35 | 	return w, X, y
36 | end
37 | 
38 | function logreg_binary_nll(w, X, y)
39 | 	D, N = size(X)
40 | 	#p1 = [Flux.sigmoid(sum(w .* X[:,i])) for i in 1:N]
41 | 	W = reshape(w, (1, D))
42 | 	p = vec(Flux.sigmoid.(W * X)) # 1xN->N vector
43 | 	#@assert isapprox(vec(p1), vec(p))
44 | 	return -sum(y .* log.(p) + (1 .- y) .* log.(1 .- p)) / N
45 | end
46 | 
47 | function logreg_binary_nll_test()
48 | 	w, X, y = make_test_data_binary()
49 | 	D, N = size(X)
50 | 	nll1 = logreg_binary_nll(w, X, y)
51 | 	W = reshape(w, (1, D));
52 | 	logits = vec(W * X)
53 | 	nll2 =  sum(Flux.logitbinarycrossentropy.(logits, y)) / N
54 | 	@test isapprox(nll1, nll2)
55 | end
56 | 
57 | function logreg_binary_nll_grad(w, X, y)
58 | 	D, N = size(X)
59 | 	g = zeros(Float64,D)
60 | 	for i=1:N
61 | 		x = X[:,i]
62 | 		p = Flux.sigmoid(sum(w .* x))
63 | 		g[:] = g[:] + (p - y[i]) .* x
64 | 	end
65 | 	return g / N
66 | end
67 | 
68 | function logreg_binary_nll_grad_test()
69 | 	w0, X, y = make_test_data_binary()
70 | 	g1 = logreg_binary_nll_grad(w0, X, y)
71 | 	g2 = Flux.gradient(w -> logreg_binary_nll(w, X, y), w0)[1]
72 | 	@test isapprox(g1, g2)
73 | end
74 | 
75 | 
76 | import Optim
77 | function logreg_fit_test()
78 | 	W, X, Y = make_test_data()
79 | 	w0 = vec(randn(size(W)))
80 | 	objective(w) = logreg_nll(w, X, Y)
81 | 	result = Optim.optimize(objective, w0, Optim.LBFGS())
82 | 	West = reshape(Optim.minimizer(result), (C,D))
83 | 	ncalls = result.iterations
84 | end
85 | 
86 | logreg_binary_nll_test()
87 | logreg_binary_nll_grad_test()
88 | 


--------------------------------------------------------------------------------
/FactorGraphs/tmp.jl:
--------------------------------------------------------------------------------
 1 | # Bayes rule for a linear Gaussian model
 2 | # X -> Y, observe Y, infer X
 3 | # Y ~ N(A x + b, R)
 4 | # X ~ N(mu, Q)
 5 | 
 6 | # We consider the example from MLAPP sec 4.4.2.2
 7 | # where A=I, b=0, mu=[0.5 0.5], Q=0.1I, R=0.1*[2 1; 1 1]
 8 | #https://github.com/probml/pmtk3/blob/master/demos/gaussInferParamsMean2d.m
 9 | 
10 | using ForneyLab
11 | import LinearAlgebra:Diagonal
12 | using Distributions
13 | 
14 | function make_model()
15 |     g = FactorGraph()
16 | 
17 |     hidden_dim = 2
18 |     obs_dim = 2
19 |     Q = 0.1*eye(hidden_dim)
20 |     mu_x = [0.5, 0.5]
21 |     A = eye(2)
22 |     b = [0.0, 0.0]
23 |     R = 0.1 .* [2 1; 1 1]
24 |     y_mean = A*mu_x + b
25 |     y_dist = Distributions.MvNormal(y_mean, R)
26 |     n_data = 10
27 |     y_data_all = rand(y_dist, n_data) # n_data x obs_dim
28 |     y_data = vec(mean(y_data_all, dims=2))
29 | 
30 |     @RV x ~ GaussianMeanVariance(mu_x, Q)
31 |     mu_y = A*x + b
32 |     @RV y ~ GaussianMeanVariance(mu_y, R)
33 |     placeholder(y, :y, dims=(obs_dim,)) # add clamping
34 | 
35 |     return x, y, y_data
36 | end
37 | 
38 | x, y, y_data = make_model()
39 | algo = Meta.parse(sumProductAlgorithm(x))
40 | eval(algo) # compile the step! function
41 | data = Dict(:y     => y_data)
42 | marginals = step!(data);
43 | post = marginals[:x]
44 | post_gauss = Distributions.MvNormal(mean(post), cov(post))
45 | prior_gauss = Distributions.MvNormal([0.5, 0.5], 0.1*eye(2))
46 | 
47 | using Plots; pyplot()
48 | xrange = -1.5:0.1:1.5
49 | yrange = xrange
50 | contour(xrange, yrange, (x,y)->Distributions.pdf(prior_gauss,[x,y]), reuse=false)
51 | title!("Prior")
52 | 
53 | contour(xrange, yrange, (x,y)->Distributions.pdf(post_gauss,[x,y]), reuse=false)
54 | title!("Posterior")
55 | 
56 | #=
57 | 
58 | import ForneyLab
59 | const FL = ForneyLab
60 | import ForneyLab.@RV
61 | import LinearAlgebra
62 | 
63 | function make_model()
64 |     g = FL.FactorGraph()
65 | 
66 |     Q = 0.1*FL.eye(2)
67 |     mu_x = [0.5, 0.5]
68 |     A = FL.eye(2)
69 |     b = [0.0, 0.0]
70 |     R = 0.1 .* [2 1; 1 1]
71 |     y_data = A*mu_x + b + randn(2) # Should sample using noise ~ R
72 | 
73 |     @RV x ~ FL.GaussianMeanVariance(mu_x, Q)
74 |     #@RV obs_noise ~ GaussianMeanVariance(zeros(nobs), R)
75 |     #@RV y = A*x + b + obs_noise
76 |     mu_y = A*x + b
77 |     @RV y ~ FL.GaussianMeanVariance(mu_y, R)
78 |     FL.placeholder(y, :y, dims=(nobs,)) # add clamping
79 | 
80 |     return x, y, y_data
81 | end
82 | 
83 | x, y, y_data = make_model()
84 | 
85 | algo = Meta.parse(FL.sumProductAlgorithm(x))
86 | eval(algo) # compile the step! function
87 | data = Dict(:y     => y_data)
88 | marginals = step!(data);
89 | post = marginals[:x]
90 | =#
91 | 


--------------------------------------------------------------------------------
/FactorGraphs/kalman_filter.jl:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/1_state_estimation_forward_only.ipynb
  4 | # Kalman filtering
  5 | 
  6 | #Data
  7 | Random.seed!(1)
  8 | n_samples = 20
  9 | x = [t for t=1:n_samples] # True state
 10 | y = x + sqrt(200.0)*randn(n_samples); # Noisy observations of state
 11 | 
 12 | using ForneyLab
 13 | 
 14 | g = FactorGraph()
 15 | 
 16 | # declare priors as random variables
 17 | @RV m_x_t_min # m_x_t_min = Variable(id=:m_x_t_min)
 18 | @RV v_x_t_min # v_x_t_min = Variable(id=:v_x_t_min)
 19 | @RV x_t_min ~ GaussianMeanVariance(m_x_t_min, v_x_t_min)
 20 | 
 21 | # System equations
 22 | # u = 1.0; v = 200.0
 23 | @RV n_t ~ GaussianMeanVariance(0.0, 200.0)
 24 | @RV x_t = x_t_min + 1.0
 25 | @RV y_t = x_t + n_t
 26 | 
 27 | # Name variable for ease of lookup
 28 | #x_t.id = :x_t;
 29 | 
 30 | # Placeholders for prior
 31 | placeholder(m_x_t_min, :m_x_t_min) # placeholder(:m_x_t_min) does not work
 32 | placeholder(v_x_t_min, :v_x_t_min)
 33 | 
 34 | # Placeholder for data
 35 | placeholder(y_t, :y_t);
 36 | 
 37 | #ForneyLab.draw(g)
 38 | 
 39 | algo = sumProductAlgorithm(x_t) # Figure out a schedule and compile to Julia code
 40 | println(algo)
 41 | # Define algorithm
 42 | eval(Meta.parse(algo))
 43 | 
 44 | # Define values for prior statistics
 45 | m_x_0 = 0.0
 46 | v_x_0 = 1000.0
 47 | 
 48 | m_x = Vector{Float64}(undef, n_samples)
 49 | v_x = Vector{Float64}(undef, n_samples)
 50 | 
 51 | 
 52 | m_x_t = m_x_0
 53 | v_x_t = v_x_0
 54 | for t = 1:n_samples
 55 |     # To allow assignment to global variables (in REPL scope),
 56 |     # we need to declare them:
 57 |     # https://github.com/JuliaLang/julia/issues/28789
 58 |     global m_x_t, v_x_t
 59 |     # Prepare data and prior statistics
 60 |     data = Dict(:y_t       => y[t],
 61 |                 :m_x_t_min => m_x_t,
 62 |                 :v_x_t_min => v_x_t)
 63 | 
 64 |     # Execute algorithm
 65 |     marginals = step!(data)
 66 | 
 67 |     # Extract posterior statistics
 68 |     m_x_t = mean(marginals[:x_t])
 69 |     v_x_t = var(marginals[:x_t])
 70 | 
 71 |     # Store to buffer
 72 |     m_x[t] = m_x_t
 73 |     v_x[t] = v_x_t
 74 | end
 75 | 
 76 | 
 77 | #http://docs.juliaplots.org/latest/examples/pyplot/
 78 | #ys = Vector[rand(10), rand(20)]
 79 | #plot(ys, color=[:black :orange], line=(:dot, 4), marker=([:hex :d], 12, 0.8, Plots.stroke(3, :gray)))
 80 | 
 81 | #using PyPlot
 82 | using Plots; pyplot()
 83 | 
 84 | xs = 1:n_samples
 85 | scatter(xs, y, color=[:blue], label="y")
 86 | plot!(xs, x, color=[:black], label="x")
 87 | plot!(xs, m_x, color=[:red], label="estimated x")
 88 | #grid("on")
 89 | ys = m_x; l = sqrt.(v_x); u = sqrt.(v_x)
 90 | #fill_between(xs, y.-l, y.+_u, color="b", alpha=0.3)
 91 | plot!([ys ys], fillrange=[ys.-l ys.+u], fillalpha=0.3, c=:red)
 92 | xlabel!("t")
 93 | #legend!(loc="upper left");
 94 | fname = "Figures/forney-kalman-filter.png"
 95 | savefig(fname)
 96 | 
 97 | 
 98 | #==
 99 | plot(collect(1:n_samples), y, "b*", label="y")
100 | plot(collect(1:n_samples), x, "k--", label="true x")
101 | 
102 | plot(collect(1:n_samples), m_x, "b-", label="estimated x")
103 | fill_between(collect(1:n_samples), m_x-sqrt.(v_x), m_x+sqrt.(v_x), color="b", alpha=0.3);
104 | grid("on")
105 | xlabel("t")
106 | legend(loc="upper left");
107 | 
108 | PyPlot.plot(collect(1:n_samples), y, "b*", label="y")
109 | PyPlot.plot(collect(1:n_samples), x, "k--", label="true x")
110 | PyPlot.plot(collect(1:n_samples), m_x, "b-", label="estimated x")
111 | PyPlot.fill_between(collect(1:n_samples), m_x-sqrt.(v_x), m_x+sqrt.(v_x), color="b", alpha=0.3);
112 | PyPlot.grid("on")
113 | PyPlot.xlabel("t")
114 | PyPlot.legend(loc="upper left");
115 | =#
116 | 


--------------------------------------------------------------------------------
/FactorGraphs/kalman_filter_1d.jl:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/1_state_estimation_forward_only.ipynb
  4 | # Kalman filtering
  5 | 
  6 | #Data
  7 | Random.seed!(1)
  8 | n_samples = 20
  9 | x = [t for t=1:n_samples] # True state
 10 | y = x + sqrt(200.0)*randn(n_samples); # Noisy observations of state
 11 | 
 12 | using ForneyLab
 13 | 
 14 | g = FactorGraph()
 15 | 
 16 | # declare priors as random variables
 17 | @RV m_x_t_min # m_x_t_min = Variable(id=:m_x_t_min)
 18 | @RV v_x_t_min # v_x_t_min = Variable(id=:v_x_t_min)
 19 | @RV x_t_min ~ GaussianMeanVariance(m_x_t_min, v_x_t_min)
 20 | 
 21 | # System equations
 22 | # u = 1.0; v = 200.0
 23 | @RV n_t ~ GaussianMeanVariance(0.0, 200.0)
 24 | @RV x_t = x_t_min + 1.0
 25 | @RV y_t = x_t + n_t
 26 | 
 27 | # Name variable for ease of lookup
 28 | #x_t.id = :x_t;
 29 | 
 30 | # Placeholders for prior
 31 | placeholder(m_x_t_min, :m_x_t_min) # placeholder(:m_x_t_min) does not work
 32 | placeholder(v_x_t_min, :v_x_t_min)
 33 | 
 34 | # Placeholder for data
 35 | placeholder(y_t, :y_t);
 36 | 
 37 | #ForneyLab.draw(g)
 38 | 
 39 | algo = sumProductAlgorithm(x_t) # Figure out a schedule and compile to Julia code
 40 | println(algo)
 41 | # Define algorithm
 42 | eval(Meta.parse(algo))
 43 | 
 44 | # Define values for prior statistics
 45 | m_x_0 = 0.0
 46 | v_x_0 = 1000.0
 47 | 
 48 | m_x = Vector{Float64}(undef, n_samples)
 49 | v_x = Vector{Float64}(undef, n_samples)
 50 | 
 51 | 
 52 | m_x_t = m_x_0
 53 | v_x_t = v_x_0
 54 | for t = 1:n_samples
 55 |     # To allow assignment to global variables (in REPL scope),
 56 |     # we need to declare them:
 57 |     # https://github.com/JuliaLang/julia/issues/28789
 58 |     global m_x_t, v_x_t
 59 |     # Prepare data and prior statistics
 60 |     data = Dict(:y_t       => y[t],
 61 |                 :m_x_t_min => m_x_t,
 62 |                 :v_x_t_min => v_x_t)
 63 | 
 64 |     # Execute algorithm
 65 |     marginals = step!(data)
 66 | 
 67 |     # Extract posterior statistics
 68 |     m_x_t = mean(marginals[:x_t])
 69 |     v_x_t = var(marginals[:x_t])
 70 | 
 71 |     # Store to buffer
 72 |     m_x[t] = m_x_t
 73 |     v_x[t] = v_x_t
 74 | end
 75 | 
 76 | 
 77 | #http://docs.juliaplots.org/latest/examples/pyplot/
 78 | #ys = Vector[rand(10), rand(20)]
 79 | #plot(ys, color=[:black :orange], line=(:dot, 4), marker=([:hex :d], 12, 0.8, Plots.stroke(3, :gray)))
 80 | 
 81 | #using PyPlot
 82 | using Plots; pyplot()
 83 | 
 84 | xs = 1:n_samples
 85 | scatter(xs, y, color=[:blue], label="y")
 86 | plot!(xs, x, color=[:black], label="x")
 87 | plot!(xs, m_x, color=[:red], label="estimated x")
 88 | #grid("on")
 89 | ys = m_x; l = sqrt.(v_x); u = sqrt.(v_x)
 90 | #fill_between(xs, y.-l, y.+_u, color="b", alpha=0.3)
 91 | plot!([ys ys], fillrange=[ys.-l ys.+u], fillalpha=0.3, c=:red)
 92 | xlabel!("t")
 93 | #legend!(loc="upper left");
 94 | fname = "Figures/forney-kalman-filter.png"
 95 | savefig(fname)
 96 | 
 97 | 
 98 | #==
 99 | plot(collect(1:n_samples), y, "b*", label="y")
100 | plot(collect(1:n_samples), x, "k--", label="true x")
101 | 
102 | plot(collect(1:n_samples), m_x, "b-", label="estimated x")
103 | fill_between(collect(1:n_samples), m_x-sqrt.(v_x), m_x+sqrt.(v_x), color="b", alpha=0.3);
104 | grid("on")
105 | xlabel("t")
106 | legend(loc="upper left");
107 | 
108 | PyPlot.plot(collect(1:n_samples), y, "b*", label="y")
109 | PyPlot.plot(collect(1:n_samples), x, "k--", label="true x")
110 | PyPlot.plot(collect(1:n_samples), m_x, "b-", label="estimated x")
111 | PyPlot.fill_between(collect(1:n_samples), m_x-sqrt.(v_x), m_x+sqrt.(v_x), color="b", alpha=0.3);
112 | PyPlot.grid("on")
113 | PyPlot.xlabel("t")
114 | PyPlot.legend(loc="upper left");
115 | =#
116 | 


--------------------------------------------------------------------------------
/Old/utils.jl:
--------------------------------------------------------------------------------
  1 | 
  2 | using Test, LinearAlgebra, Flux
  3 | 
  4 | #https://discourse.julialang.org/t/psa-replacement-of-ind2sub-sub2ind-in-julia-0-7/14666/20
  5 | function lin2cart(shape, indices)
  6 |     #return Tuple.(CartesianIndices(shape)[indices])
  7 |     return [Base._ind2sub(Tuple(shape), i) for i in indices]
  8 | end
  9 | 
 10 | function cart2lin(shape, indices)
 11 |     #return  LinearIndices(shape)[CartesianIndex.(indices)]
 12 |     return [Base._sub2ind(Tuple(shape), i...) for i in indices]
 13 | end
 14 | 
 15 | function normalize_probdist(probdist)
 16 |     s = sum(probdist)
 17 |     return probdist ./ s
 18 | end
 19 | 
 20 | # Source:
 21 | # https://github.com/QuantEcon/QuantEcon.lectures.code/blob/master/kalman/gaussian_contours.jl
 22 | function bivariate_normal(X::Matrix, Y::Matrix, σ_x::Real=1.0, σ_y::Real=1.0,
 23 |                          μ_x::Real=0.0, μ_y::Real=0.0, σ_xy::Real=0.0)
 24 |    Xμ = X .- μ_x
 25 |    Yμ = Y .- μ_y
 26 |    ρ = σ_xy/(σ_x*σ_y)
 27 |    z = Xμ.^2/σ_x^2 + Yμ.^2/σ_y^2 - 2*ρ.*Xμ.*Yμ/(σ_x*σ_y)
 28 |    denom = 2π*σ_x*σ_y*sqrt(1-ρ^2)
 29 |    return exp.(-z/(2*(1-ρ^2))) ./ denom
 30 | end
 31 | 
 32 | 
 33 | # https://github.com/probml/pmtk3/blob/master/matlabTools/graphics/gaussPlot2d.m
 34 | function plot_gauss2d(m, C)
 35 |     U = eigvecs(C)
 36 |     D = eigvals(C)
 37 |     N = 100
 38 |     t = range(0, stop=2*pi, length=N)
 39 |     xy = zeros(Float64, 2, N)
 40 |     xy[1,:] = cos.(t)
 41 |     xy[2,:] = sin.(t)
 42 |     #k = sqrt(6) # approx sqrt(chi2inv(0.95, 2)) = 2.45
 43 |     k = 1.0
 44 |     w = (k * U * Diagonal(sqrt.(D))) * xy # 2*N
 45 |     #Plots.scatter!([m[1]], [m[2]], marker=:star, label="")
 46 |     handle = Plots.plot!(w[1,:] .+ m[1], w[2,:] .+ m[2], label="")
 47 |     return handle
 48 | end
 49 | 
 50 | function plot_gauss2d_test()
 51 |     m = [0.0, 0.0]
 52 |     #C = randn(2,2); C = C*C';
 53 |     C = [1.0 0.0; 0.0 3.0];
 54 |     @test isposdef(C)
 55 |     Plots.scatter([m[1]], [m[2]], marker=:star)
 56 |     plot_2dgauss(m, C)
 57 | end
 58 | 
 59 | #Compute scalar-valued function fn(x) and its derivative using Flux's
 60 | #revere mode AD.
 61 | function fun_and_grad(fn, x)
 62 |   # Based on https://github.com/tpapp/LogDensityProblems.jl/blob/master/src/AD_Flux.jl
 63 |   y, back = Flux.Tracker.forward(fn, x)
 64 |   yval = Flux.Tracker.data(y)
 65 |   g = first(Flux.Tracker.data.(back(1)))
 66 |   return yval, g
 67 | end
 68 | 
 69 | function fun_and_grad_test()
 70 | 	f(x) = x[1]^2 + 100*x[2]^2
 71 | 	g(x) = [2*x[1], 200*x[2]]
 72 | 	x = [1,1]
 73 | 	y1 = f(x)
 74 | 	g1 = g(x)
 75 | 	y2, g2 = fun_and_grad(f, x)
 76 | 	@test isapprox(y1, y2)
 77 | 	@test isapprox(g1, g2)
 78 | end
 79 | 
 80 | 
 81 | 
 82 | #http://julianlsolvers.github.io/Optim.jl/stable/#user/minimization/
 83 | # The minimum value of 0.0 is at (a,a^2).
 84 | function rosenbrock(x; a=1.0, b=100.0)
 85 |   return (a - x[1])^2 + b * (x[2] - x[1]^2)^2
 86 | end
 87 | 
 88 | function rosenbrock_grad!(g, x; a=1.0, b=100.0)
 89 | 	g[1] = -2.0 * (a-x[1]) -4*b*(x[2] - x[1]^2) * x[1]
 90 | 	g[2] = 2.0 * b * (x[2] - x[1]^2)
 91 | end
 92 | 
 93 | function rosenbrock_grad(x; a=1.0, b=100.0)
 94 |   g = Vector{Float64}(undef, 2)
 95 |   rosenbrock_grad!(g, x, a=a, b=b)
 96 |   return g
 97 | end
 98 | 
 99 | function rosenbrock_hessian(x; a=1.0, b=100.0)
100 | 	H = zeros(2,2)
101 | 	H[1,1] = 2 + 8*b * x[2];
102 | 	H[1,2] = -4 * b * x[1];
103 | 	H[2,1] = H[1,2];
104 | 	H[2,2] = 2 * b;
105 | 	return H
106 | end
107 | 
108 | function rosenbrock_condition()
109 | 	H = rosenbrock_hessian([1,1])
110 | 	println("condition number = $(cond(H))")
111 | 	evals = eigvals(H)
112 | 	@assert isapprox(cond(H), evals[2]/evals[1])
113 | end
114 | 
115 | function rosenbrock_grad_test()
116 |   x = randn(2)
117 |   y, g = fun_and_grad(rosenbrock, x)
118 |   @test isapprox(y, rosenbrock(x))
119 |   @test isapprox(g, rosenbrock_grad(x))
120 |   g2 = Tracker.gradient(rosenbrock, x)[1]
121 |   @test isapprox(g, g2)
122 | end
123 | 


--------------------------------------------------------------------------------
/Old/kalman_tracking_demo.jl:
--------------------------------------------------------------------------------
  1 | 
  2 | # https://github.com/probml/pmtk3/blob/bb64a174c4ded561cfdba26d835e1aaa7221c368/demos/kalmanTrackingDemo.m
  3 | 
  4 | 
  5 | using  LinearAlgebra, Test
  6 | import Distributions
  7 | include("kalman_qe.jl")
  8 | 
  9 | function make_params()
 10 |     F = [1 0 1 0; 0 1 0 1; 0 0 1 0; 0 0 0 1.0];
 11 |     H = [1.0 0 0 0; 0 1 0 0];
 12 |     nobs, nhidden = size(H)
 13 |     Q = Matrix(I, nhidden, nhidden) .* 0.001
 14 |     R = Matrix(I, nobs, nobs) .* 0.1 # 1.0
 15 |     mu0 = [8, 10, 1, 0.0];
 16 |     V0 = Matrix(I, nhidden, nhidden) .* 1.0
 17 |     params = (mu0 = mu0, V0 = V0, F = F, H = H, Q = Q, R = R)
 18 |     return params
 19 | end
 20 | 
 21 | 
 22 | # Returns xs: H*T, ys: O*T
 23 | function kalman_sample(kn::Kalman, T::Int)
 24 |     nobs, nhidden = size(kn.G)
 25 |     xs = Array{Float64}(undef, nhidden, T)
 26 |     ys = Array{Float64}(undef, nobs, T)
 27 |     mu0 = kn.cur_x_hat
 28 |     V0 = kn.cur_sigma
 29 |     prior_z = Distributions.MvNormal(mu0, V0)
 30 |     process_noise_dist = Distributions.MvNormal(kn.Q)
 31 |     obs_noise_dist = Distributions.MvNormal(kn.R)
 32 |     xs[:,1] = rand(prior_z)
 33 |     ys[:,1] = kn.G*zs[:,1] + rand(obs_noise_dist)
 34 |     for t=2:T
 35 |         xs[:,t] = kn.A*xs[:,t-1] + rand(process_noise_dist)
 36 |         ys[:,t] = kn.G*xs[:,t] + rand(obs_noise_dist)
 37 |     end
 38 |     return xs, ys
 39 | end
 40 | 
 41 | 
 42 | function kalman_filter(kn::Kalman, y::AbstractMatrix)
 43 |     obs_size, T = size(y)
 44 |     n = size(kn.G, 2)
 45 |     @assert n == kn.n
 46 |     x_filtered = Matrix{Float64}(undef, n, T)
 47 |     sigma_filtered = Array{Float64}(undef, n, n, T)
 48 |     sigma_forecast = Array{Float64}(undef, n, n, T)
 49 |     logL = 0
 50 |     for t in 1:T
 51 |         logL = logL + log_likelihood(kn, y[:, t])
 52 |         prior_to_filtered!(kn, y[:, t])
 53 |         x_filtered[:, t], sigma_filtered[:, :, t] = kn.cur_x_hat, kn.cur_sigma
 54 |         filtered_to_forecast!(kn)
 55 |         sigma_forecast[:, :, t] = kn.cur_sigma
 56 |     end
 57 |     return x_filtered, logL, sigma_filtered, sigma_forecast
 58 | end
 59 | 
 60 | function kalman_smoother(kn::Kalman, y::AbstractMatrix)
 61 |     T = size(y, 2)
 62 |     x_filtered, logL, sigma_filtered, sigma_forecast = kalman_filter(kn, y)
 63 |     x_smoothed = copy(x_filtered)
 64 |     sigma_smoothed = copy(sigma_filtered)
 65 |     for t in (T-1):-1:1
 66 |         x_smoothed[:, t], sigma_smoothed[:, :, t] =
 67 |             go_backward(kn, x_filtered[:, t], sigma_filtered[:, :, t],
 68 |                         sigma_forecast[:, :, t], x_smoothed[:, t+1],
 69 |                         sigma_smoothed[:, :, t+1])
 70 |     end
 71 |     return x_smoothed, logL, sigma_smoothed
 72 | end
 73 | 
 74 | include("utils.jl")
 75 | function do_plot(zs, ys, m, V)
 76 |     # m is H*T, V is H*H*T, where H=4 hidden states
 77 |     plt = scatter(ys[1,:], ys[2,:], label="observed", reuse=false)
 78 |     plt = scatter!(zs[1,:], zs[2,:], label="true", marker=:star)
 79 |     xlims!(minimum(ys[1,:])-1, maximum(ys[1,:])+1)
 80 |     ylims!(minimum(ys[2,:])-1, maximum(ys[2,:])+1)
 81 |     display(plt)
 82 |     m2 = m[1:2,:]
 83 |     V2 = V[1:2, 1:2, :]
 84 |     T = size(m2, 2)
 85 |     for t=1:T
 86 |         plt = plot_gauss2d(m2[:,t], V2[:,:,t])
 87 |     end
 88 |     display(plt)
 89 | end
 90 | 
 91 | 
 92 | Random.seed!(2)
 93 | T = 10
 94 | params = make_params()
 95 | F = params.F; H = params.H; Q = params.Q; R = params.R; mu0 = params.mu0; V0 = params.V0;
 96 | kf = Kalman(F, H, Q, R)
 97 | (zs, ys) = kalman_sample(kf, T) # H*T, O*T
 98 | println("inference")
 99 | set_state!(kf, mu0, V0)
100 | mF, loglik, VF = kalman_filter(kf, ys)
101 | set_state!(kf, mu0, V0)
102 | mS, loglik, VS = kalman_smoother(kf, ys)
103 | #m, V = kalman_smoother(params, ys)
104 | 
105 | println("plotting")
106 | using Plots; pyplot()
107 | closeall()
108 | do_plot(zs, ys, mF, VF); title!("Filtering")
109 | do_plot(zs, ys, mS, VS); title!("Smoothing")
110 | 


--------------------------------------------------------------------------------
/Old/zigzag.jl:
--------------------------------------------------------------------------------
  1 | # Reproduce fig 5.1 from A4O book to illustrate zigzag effect
  2 | # of gradient descent with linesearch
  3 | 
  4 | using Vec
  5 | using LinearAlgebra
  6 | 
  7 | include("utils.jl")
  8 | include("first-order.jl")
  9 | 
 10 | 
 11 | f(x) = rosenbrock(x, a=1, b=5)
 12 | ∇f(x) = rosenbrock_grad(x, a=1, b=5)
 13 | ftuple = (x,y) -> f([x,y])
 14 | xdomain = range(-2, stop=2, length=100)
 15 | ydomain = range(-2, stop=2, length=100)
 16 | x0 = [-1.0, -1.0]
 17 | levels=[1,2,3,5,10,20,50,100]
 18 | 
 19 | 
 20 | N = 50
 21 | expts = Tuple{DescentMethod, String}[]
 22 | push!(expts, (GradientDescentLineSearchExact(), "exact-linesearch"))
 23 | push!(expts, (GradientDescentLineSearch(), "backtracking-linesearch"))
 24 | push!(expts, (GradientDescent(0.01), "GD-0.01"))
 25 | push!(expts, (GradientDescent(0.05), "GD-0.05"))
 26 | push!(expts, (GradientDescent(0.055), "GD-0.055"))
 27 | 
 28 | using Plots
 29 | pyplot()
 30 | for (M, name) in expts
 31 | 	println("using method $name")
 32 | 	plt = contour(xdomain, ydomain, (x,y)->f([x,y]), levels=levels, reuse=false)
 33 | 	scatter!([x0[1]], [x0[2]], marker=:circle, markercolor=:black,
 34 | 		markersize=5, label="start")
 35 | 	scatter!([1.0], [1.0], marker=:star, markercolor=:red,
 36 | 		markersize=5, label="opt")
 37 | 
 38 | 	pts = [x0]
 39 | 	ftrace = [f(x0)]
 40 | 	function cb(M, fn, grad, x, iter)
 41 | 		y = fn(x)
 42 | 		g = grad(x)
 43 | 		gnorm = norm(g)
 44 | 		#println("$name, iter=$iter, f=$y, gnorm=$gnorm")
 45 | 		if abs(y)>1e10 || abs(gnorm)>1e10 || abs(gnorm) < 1e-2; return true; end
 46 | 		push!(pts, x)
 47 | 		push!(ftrace, y)
 48 | 		return false
 49 | 	end
 50 | 	xfinal = run_descent_method(M, f, ∇f, x0; max_iter=N, callback=cb)
 51 | 	xtrace = hcat(pts...) # 2xN
 52 | 	#println(ftrace)
 53 | 	plot!(xtrace[1,:], xtrace[2,:], label=name, linecolor=:red, width=2)
 54 | 	display(plt)
 55 | 	savefig(plt, "Figures/zigzag-$name.pdf")
 56 | 	#plt = plot(ftrace, label=name; yaxis=:log)
 57 | 	#display(plt)
 58 | end
 59 | 
 60 | 
 61 | 
 62 | #=
 63 | # Version using arrays
 64 | fn(x) = rosenbrock(x, a=1, b=5)
 65 | dfn(x) = rosenbrock_grad(x, a=1, b=5)
 66 | xdomain = range(-2, stop=2, length=100)
 67 | ydomain = range(-2, stop=2, length=100)
 68 | x0 = [-1.0, -1.0]
 69 | levels=[1,2,3,5,10,20,50,100]
 70 | 
 71 | pts = [x0]
 72 | for i in 1 : 10
 73 |     xcur = pts[end]
 74 |     g = dfn(xcur)
 75 |     d = -normalize(g)
 76 |     f1d = a -> begin
 77 |         xnew = xcur + a*d
 78 |         g2 = dfn(xnew)
 79 |     	v1 = dot(g2, d) / norm(d)
 80 | 		return v1
 81 |     end
 82 |     alpha = secant_method(f1d, 0.0, 1.0, 0.0001)
 83 |     push!(pts, xcur + alpha*d)
 84 | end
 85 | using Plots
 86 | pyplot()
 87 | plt = contour(xdomain, ydomain, (x,y) -> fn([x,y]), levels=levels, reuse=false)
 88 | scatter!([x0[1]], [x0[2]], marker=:star, markercolor=:red, markersize=5)
 89 | xtrace = hcat(pts...) # 2xN
 90 | plot!(xtrace[1,:], xtrace[2,:])
 91 | display(plt)
 92 | =#
 93 | 
 94 | 
 95 | #=
 96 | # Version using Vec (tuples)
 97 | # rosenbrock(a=1, b=5)
 98 | f = (x,y) -> (1-x)^2 + 5*(y - x^2)^2
 99 | df = (x,y) -> [2*(10*x^3-10*x*y+x-1), 10*(y-x^2)]
100 | xdomain = range(-2, stop=2, length=100)
101 | ydomain = range(-2, stop=2, length=100)
102 | p0 = (-1, -1)
103 | 
104 | pts2d = Tuple{Float64,Float64}[p0]
105 | for i in 1 : 10
106 |     x,y = pts2d[end]
107 |     dp = normalize(-VecE2{Float64}(df(x, y)...))
108 | 	println("dp $dp")
109 |     f1d = a -> begin
110 |         x2 = x + a*dp.x
111 |         y2 = y + a*dp.y
112 | 
113 |         da = df(x2, y2)
114 |         pa = VecE2{Float64}(da[1], da[2])
115 | 
116 |         pp = proj(pa, dp, Float64)
117 | 		println("da=$da, pp=$pp")
118 | 		return pp
119 |     end
120 |     alpha = secant_method(f1d, 0.0, 1.0, 0.0001)
121 |     push!(pts2d, (x + alpha*dp.x, y + alpha*dp.y))
122 | end
123 | using Plots
124 | pyplot()
125 | plt = Plots.contour(xdomain, ydomain, f, levels=[1,2,3,5,10,20,50,100], reuse=false)
126 | Plots.plot!([p[1] for p in pts2d], [p[2] for p in pts2d], label="GD with line search")
127 | display(plt)
128 | =#
129 | 


--------------------------------------------------------------------------------
/FactorGraphs/lin_probit_regression_corrupted_classlabels.jl:
--------------------------------------------------------------------------------
  1 | # Tested with Julia v1.1.0 and ForneyLab v0.9.1
  2 | 
  3 | using LinearAlgebra, PyPlot, ForneyLab
  4 | import Distributions: MvNormal
  5 | 
  6 | # Custom message update rule to interpret integers as Categorical Distributions
  7 | # with one-hot coding.
  8 | @sumProductRule(:node_type     => Transition,
  9 |                 :outbound_type => Message{Categorical},
 10 |                 :inbound_types => (Message{PointMass}, Nothing, Message{PointMass}),
 11 |                 :name          => SPTransitionIn1PVP)
 12 | 
 13 | function ruleSPTransitionIn1PVP(msg_out::Message{PointMass, Univariate},
 14 |                                 msg_in1::Nothing,
 15 |                                 msg_a::Message{PointMass, MatrixVariate})
 16 |     # Convert integer in {1,2} to Categorical with probability vector p
 17 |     p = zeros(2)
 18 |     p[msg_out.dist.params[:m]] = 1.0
 19 |     a = msg_a.dist.params[:m]'*p
 20 | 
 21 |     Message(Univariate, Categorical, p=a./sum(a))
 22 | end
 23 | 
 24 | # EP algorithm building
 25 | function build_algorithm(dim::Int, N::Int64, alpha::Float64; output_file="ep_algo.jl")
 26 |     fg = FactorGraph()
 27 |     A = [1.0-alpha alpha; alpha 1.0-alpha] # label flipping probability matrix
 28 |     @RV w ~ GaussianMeanVariance(constant(zeros(dim)), constant(100 * eye(dim)), id=:w)
 29 |     sites = Variable[w]
 30 |     for t=1:N
 31 |         x_t = placeholder(Variable(id=:x_*t), :x, index=t, dims=(dim,))
 32 |         @RV dp_t ~ DotProduct(w, x_t, id=:dp_*t)
 33 |         @RV z_t ~ Sigmoid(dp_t, id=:z_*t) # uncorrupted label
 34 |         @RV y_t ~ Transition(z_t, constant(A, id=:A_*t), id=:y_*t) # corrupted label
 35 |         placeholder(y_t, :y, index=t, datatype=Int)
 36 |         push!(sites, dp_t)
 37 |     end
 38 | 
 39 |     # ForneyLab.draw(fg)
 40 |     ep_schedule = expectationPropagationSchedule(sites)
 41 |     messagePassingAlgorithm(ep_schedule, marginalSchedule(w), file=output_file)
 42 | end
 43 | 
 44 | # Synthetic data generation
 45 | function generate_data(N::Int, α::Float64)
 46 |     X = zeros(3, N)
 47 |     y = Vector{Int64}(undef, N)
 48 |     dists = [MvNormal([-1.0; -0.75], 0.5*eye(2)); MvNormal([1.25; 1.0], 0.3*diagm(0 => [1.0; 1.5]))]
 49 |     for i=1:N
 50 |         y[i] = rand([1;2])
 51 |         X[1:2,i] = rand(dists[y[i]])
 52 |         X[3,i] = 1.0
 53 |         if rand() < α # Flip class label
 54 |             y[i] = (y[i] == 1) ? 2 : 1
 55 |         end
 56 |     end
 57 | 
 58 |     return X, y
 59 | end
 60 | 
 61 | # Plotting
 62 | function plot_state(X, y, β_dist=false)
 63 |     figure()
 64 |     scatter(X[1,:], X[2,:], c=float(y).-1.0)
 65 |     # PyPlot.gray()
 66 | 
 67 |     if isa(β_dist, ProbabilityDistribution)
 68 |         # σ(β[1]*x[1] + β[2]*x[2] + β[3]) = 0.5
 69 |         # ⇛ β[1]*x[1] + β[2]*x[2] + β[3] = 0
 70 |         # ⇛ x[2] = - β[1]/β[2]*x[1] - β[3]/β[2]
 71 |         #        = a * x[1] + b
 72 |         x1 = [minimum(X[1,:]); maximum(X[1,:])]
 73 |         for i=1:100
 74 |             β = ForneyLab.sample(β_dist)
 75 |             a,b = [-1*β[1]; -1*β[3]] ./ β[2]
 76 |             plot(x1, a*x1.+b, "-", color="k", alpha=0.2)
 77 |         end
 78 | 
 79 |         xlim(x1)
 80 |         ylim([minimum(X[2,:]); maximum(X[2,:])])
 81 |         PyPlot.xticks([])
 82 |         PyPlot.yticks([])
 83 |     end
 84 |     # savefig("../figures/lin_class_posterior.eps")
 85 | end
 86 | 
 87 | # Generate training data
 88 | N = 100
 89 | α = 0.2 # Label corruption probability
 90 | # Random.seed!(1234)
 91 | X_train, y_train = generate_data(N, α)
 92 | # plot_state(X_train, y_train)
 93 | 
 94 | # Build inference algorithm
 95 | # Comment the following line to skip regeneration of the algorithm
 96 | build_algorithm(3, N, α, output_file="ep_algo.jl")
 97 | 
 98 | data = Dict(
 99 |         :x => [X_train[:,t] for t=1:N],
100 |         :y => y_train)
101 | 
102 | # Perform inference
103 | include("ep_algo.jl")
104 | messages = init()
105 | marginals = Dict()
106 | for t=1:20
107 |     println("Step $t")
108 |     step!(data, marginals, messages)
109 | end
110 | println("DONE!")
111 | println(marginals)
112 | 
113 | # Plot result
114 | plot_state(X_train, y_train, marginals[:w])
115 | savefig("lin_class_posterior_robust.pdf")
116 | PyPlot.show()
117 | 


--------------------------------------------------------------------------------
/FactorGraphs/vb_gmm.jl:
--------------------------------------------------------------------------------
  1 | 
  2 | # Variational Bayes for a 1d GMM
  3 | 
  4 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/4_variational_estimation_gaussian_mixture.ipynb
  5 | 
  6 | # Generate toy data set
  7 | 
  8 | 
  9 | import Distributions: Normal, pdf
 10 | using Random
 11 | 
 12 | Random.seed!(238)
 13 | n = 50
 14 | y_data = Float64[]
 15 | pi_data = 0.3
 16 | d1 = Normal(-2., 0.75)
 17 | d2 = Normal(1.0, 0.75)
 18 | z_data = rand(n) .< pi_data
 19 | for i = 1:n
 20 |     push!(y_data, z_data[i] ? rand(d1) : rand(d2))
 21 | end
 22 | 
 23 | using ForneyLab
 24 | 
 25 | g = FactorGraph()
 26 | 
 27 | # Specify generative model
 28 | @RV _pi ~ Beta(1.0, 1.0)
 29 | @RV m_1 ~ GaussianMeanVariance(0.0, 100.0)
 30 | @RV w_1 ~ Gamma(0.01, 0.01)
 31 | @RV m_2 ~ GaussianMeanVariance(0.0, 100.0)
 32 | @RV w_2 ~ Gamma(0.01, 0.01)
 33 | 
 34 | z = Vector{Variable}(undef, n)
 35 | y = Vector{Variable}(undef, n)
 36 | for i = 1:n
 37 |     @RV z[i] ~ Bernoulli(_pi)
 38 |     @RV y[i] ~ GaussianMixture(z[i], m_1, w_1, m_2, w_2)
 39 | 
 40 |     placeholder(y[i], :y, index=i)
 41 | end
 42 | 
 43 | # Specify recognition factorization (mean-field)
 44 | q = RecognitionFactorization(_pi, m_1, w_1, m_2, w_2, z, ids=[:PI, :M1, :W1, :M2, :W2, :Z])
 45 | 
 46 | # Generate the algorithm
 47 | println("generating codewords")
 48 | algo = variationalAlgorithm(q)
 49 | algo_F = freeEnergyAlgorithm(q);
 50 | 
 51 | # Load algorithms
 52 | println("compiling code")
 53 | eval(Meta.parse(algo))
 54 | eval(Meta.parse(algo_F));
 55 | 
 56 | data = Dict(:y => y_data)
 57 | 
 58 | # Prepare recognition distributions
 59 | marginals = Dict(:_pi => vague(Beta),
 60 |                  :m_1 => ProbabilityDistribution(Univariate, GaussianMeanVariance, m=-1.0, v=1e4),
 61 |                  :w_1 => vague(Gamma),
 62 |                  :m_2 => ProbabilityDistribution(Univariate, GaussianMeanVariance, m=1.0, v=1e4),
 63 |                  :w_2 => vague(Gamma))
 64 | for i = 1:n
 65 |     marginals[:z_*i] = vague(Bernoulli)
 66 | end
 67 | 
 68 | # Execute algorithm
 69 | n_its = 10
 70 | F = Float64[]
 71 | for i = 1:n_its
 72 |     println("inference step $i")
 73 |     stepZ!(data, marginals)
 74 |     stepPI!(data, marginals)
 75 |     stepM1!(data, marginals)
 76 |     stepW1!(data, marginals)
 77 |     stepM2!(data, marginals)
 78 |     stepW2!(data, marginals)
 79 | 
 80 |     # Store variational free energy for visualization
 81 |     push!(F, freeEnergy(data, marginals))
 82 | end
 83 | 
 84 | using PyPlot
 85 | 
 86 | # Plot free energy to check for convergence
 87 | figure(figsize=(8,4))
 88 | subplot(211)
 89 | plot(1:n_its, F, color="black")
 90 | grid("on")
 91 | xlabel("VMP iteration")
 92 | ylabel("Variational free energy")
 93 | gcf()
 94 | 
 95 | # Plot data
 96 | subplot(212)
 97 | scatter(y_data[z_data], -0.25*ones(sum(z_data)), color="blue", linewidth=2, marker=".")
 98 | scatter(y_data[.!z_data], -0.25*ones(sum(.!z_data)), color="orange", linewidth=2, marker=".")
 99 | 
100 | # Plot estimated distribution
101 | x_test = range(-4, stop=4, length=200)
102 | d1_est = Normal(mean(marginals[:m_1]), sqrt(var(marginals[:m_1])))
103 | d2_est = Normal(mean(marginals[:m_2]), sqrt(var(marginals[:m_2])))
104 | pi_est = mean(marginals[:_pi])
105 | gmm_pdf = pi_est * pdf.(Ref(d1_est), x_test) + (1-pi_est) * pdf.(Ref(d2_est), x_test)
106 | plot(x_test, gmm_pdf, color="k")
107 | xlabel(L"y")
108 | ylabel(L"p(y|\mathcal{D})")
109 | xlim([-4,4])
110 | grid()
111 | tight_layout()
112 | gcf()
113 | 
114 | 
115 | #==
116 | using Plots; pyplot()
117 | 
118 | # Plot free energy to check for convergence
119 | plot(1:n_its, F, c=:black, lab="Free energy")
120 | xlabel!("Iteration")
121 | ylabel!("F")
122 | 
123 | # Plot data, superimpose fitted models
124 | scatter(y_data[z_data], -0.25*ones(sum(z_data)), color=:blue, markersize=6)
125 | scatter!(y_data[.!z_data], -0.25*ones(sum(.!z_data)), color=:orange, markersize=6)
126 | x_test = range(-4, stop=4, length=200)
127 | d1_est = Normal(mean(marginals[:m_1]), sqrt(var(marginals[:m_1])))
128 | d2_est = Normal(mean(marginals[:m_2]), sqrt(var(marginals[:m_2])))
129 | pi_est = mean(marginals[:_pi])
130 | gmm_pdf = pi_est * pdf.(Ref(d1_est), x_test) + (1-pi_est) * pdf.(Ref(d2_est), x_test)
131 | plot!(x_test, gmm_pdf, color=:black)
132 | xlabel!(L"y")
133 | str = "\$p(y|\\mathcal{D})\$"; ylabel!(str)
134 | xlims!((-4,4))
135 | grid!()
136 | ==#
137 | 


--------------------------------------------------------------------------------
/FactorGraphs/gauss-bayes-rule-2d.jl:
--------------------------------------------------------------------------------
  1 | # Bayes rule for a linear Gaussian model
  2 | # X -> Y, observe Y, infer X
  3 | # Y ~ N(A x + b, Sigma_y)
  4 | # X ~ N(mu_x, Sigma_x)
  5 | 
  6 | # We consider the example from "Machine learning: a probabilistic perspective"
  7 | # (Murphy, 2012) sec 4.4.2.2.
  8 | # where A=I, b=0, mu=[0.5 0.5], Sigma_x=0.1I, Sigma_y=0.1*[2 1; 1 1]
  9 | # Corresponding Matlab code is here:
 10 | # https://github.com/probml/pmtk3/blob/master/demos/gaussInferParamsMean2d.m
 11 | # Figures should look like this
 12 | # http://people.cs.ubc.ca/~murphyk/MLbook/figReport-16-Aug-2012/pdfFigures/gauss2dupdatePostSubplot.pdf
 13 | 
 14 | using ForneyLab
 15 | using LinearAlgebra, Distributions, Test
 16 | 
 17 | function make_data(params, n_data)
 18 |     y_mean = params.A * params.true_x  + params.b
 19 |     y_dist = Distributions.MvNormal(y_mean, params.Sigma_y)
 20 |     y_data = rand(y_dist, n_data) # n_data x obs_dim
 21 |     return y_data
 22 | end
 23 | 
 24 | function make_factor_graph(params)
 25 |     g = FactorGraph()
 26 |     @RV x ~ GaussianMeanVariance(params.mu_x, params.Sigma_x)
 27 |     mu_y = params.A * x + params.b
 28 |     @RV y ~ GaussianMeanVariance(mu_y, params.Sigma_y)
 29 |     placeholder(y, :y, dims=(length(params.b),))
 30 |     fg = (x = x, y = y)
 31 |     return fg
 32 | end
 33 | 
 34 | function make_fg_inference(fg)
 35 |     algo = Meta.parse(sumProductAlgorithm(fg.x))
 36 |     eval(algo) # compile the step! function
 37 |     function infer(y_data)
 38 |         data = Dict(fg.y.id => y_data)
 39 |         marginals = step!(data);
 40 |         post = marginals[:x]
 41 |         post_gauss = Distributions.MvNormal(mean(post), cov(post))
 42 |         return post_gauss
 43 |     end
 44 |     return infer
 45 | end
 46 | 
 47 | function bayes_rule_lin_gauss(params, y)
 48 |     # Implements eqn 4.125 of MLAPP
 49 |     A = params.A
 50 |     At = permutedims(A)
 51 |     b = params.b
 52 |     prior_prec = inv(params.Sigma_x)
 53 |     prior_mean = params.mu_x
 54 |     obs_prec = inv(params.Sigma_y)
 55 |     post_prec = prior_prec + At * obs_prec * A
 56 |     post_cov = inv(post_prec)
 57 |     post_mean = post_cov*(At * obs_prec * (y-b) + prior_prec * prior_mean)
 58 |     post_gauss = Distributions.MvNormal(post_mean, post_cov)
 59 |     return post_gauss
 60 | end
 61 | 
 62 | #=
 63 | params = Dict(
 64 |     :true_x => [0.5, 0.5],
 65 |      :mu_x => [0.0, 0.0],
 66 |      :Sigma_x => 0.1 * eye(2),
 67 |      :A => eye(2),
 68 |      :b => [0.0, 0.0],
 69 |      :Sigma_y => 0.1 .* [2 1; 1 1]
 70 |      )
 71 | =#
 72 | 
 73 |  params_gen =
 74 |      (true_x = [0.5, 0.5],
 75 |       mu_x = [0.0, 0.0],
 76 |       Sigma_x = 0.1 * eye(2),
 77 |       A = eye(2),
 78 |       b = [0.0, 0.0],
 79 |       Sigma_y = 0.1 .* [2 1; 1 1]
 80 |       )
 81 | 
 82 | n_data = 10
 83 | Random.seed!(1)
 84 | y_data_all = make_data(params_gen, n_data)
 85 | y_data_mean = vec(mean(y_data_all, dims=2))
 86 | 
 87 | params =
 88 |     (true_x = [0.5, 0.5],
 89 |      mu_x = [0.0, 0.0],
 90 |      Sigma_x = 0.1 * eye(2),
 91 |      A = eye(2),
 92 |      b = [0.0, 0.0],
 93 |      Sigma_y = 0.1 .* [2 1; 1 1] / n_data # since using average of observations
 94 |      )
 95 | fg = make_factor_graph(params)
 96 | infer = make_fg_inference(fg)
 97 | post_gauss = infer(y_data_mean)
 98 | post_gauss2 = bayes_rule_lin_gauss(params, y_data_mean)
 99 | @test isapprox(mean(post_gauss), mean(post_gauss2))
100 | @test isapprox(cov(post_gauss), cov(post_gauss2))
101 | 
102 | using Plots; pyplot()
103 | closeall()
104 | xrange = -1.5:0.01:1.5
105 | yrange = xrange
106 | plt = scatter(y_data_all[1,:], y_data_all[2,:], label="obs", reuse=false)
107 | scatter!([params.true_x[1]], [params.true_x[2]], label="truth")
108 | xlims!(minimum(xrange), maximum(xrange))
109 | ylims!(minimum(xrange), maximum(xrange))
110 | title!("Data")
111 | savefig("Figures/gauss-bayes-rule-2d-data.png")
112 | display(plt)
113 | 
114 | prior_gauss = Distributions.MvNormal(params.mu_x, params.Sigma_x)
115 | plt = contour(xrange, yrange, (x,y)->Distributions.pdf(prior_gauss,[x,y]),
116 |     reuse=false, title="prior")
117 | savefig("Figures/gauss-bayes-rule-2d-prior.png")
118 | display(plt)
119 | 
120 | plt = contour(xrange, yrange, (x,y)->Distributions.pdf(post_gauss,[x,y]),
121 |     reuse=false, title = "Posterior")
122 | savefig("Figures/gauss-bayes-rule-2d-post.png")
123 | display(plt)
124 | 


--------------------------------------------------------------------------------
/Old/flux-demo.jl:
--------------------------------------------------------------------------------
  1 | #https://fluxml.ai/Flux.jl/stable/models/basics/
  2 | using Flux.Tracker
  3 | using Flux
  4 | 
  5 | f(x) = 3x^2 + 2x + 1
  6 | 
  7 | # df/dx = 6x + 2
  8 | df(x) = Tracker.gradient(f, x)[1]
  9 | 
 10 | df(2) # 14.0 (tracked)
 11 | 
 12 | # d²f/dx² = 6
 13 | d2f(x) = Tracker.gradient(df, x; nest=true)[1]
 14 | 
 15 | # This fails: known bug
 16 | # https://github.com/FluxML/Flux.jl/issues/566
 17 | #d2f(2) # 6.0 (tracked)
 18 | 
 19 | f(W, b, x) = W * x + b
 20 | 
 21 | Tracker.gradient(f, 2, 3, 4)
 22 | #(4.0 (tracked), 1.0, 2.0 (tracked))
 23 | 
 24 | W = param(2) # 2.0 (tracked)
 25 | b = param(3) # 3.0 (tracked)
 26 | 
 27 | f(x) = W * x + b
 28 | 
 29 | params = Params([W, b])
 30 | grads = Tracker.gradient(() -> f(4), params)
 31 | 
 32 | grads[W] # 4.0  df/dW = x = 4
 33 | grads[b] # 1.0  df/db = 1
 34 | 
 35 | 
 36 | f2(W, b, x) = W * x + b
 37 | W = param(2)
 38 | b = param(3)
 39 | x = param(4)
 40 | params2 = Params([W, b, x])
 41 | grads2 = Tracker.gradient(() -> f2, params2)
 42 | 
 43 | 
 44 | ###
 45 | Random.seed!(1234)
 46 | W = rand(2, 5)
 47 | b = rand(2)
 48 | 
 49 | predict(x) = W*x .+ b
 50 | 
 51 | function loss(x, y)
 52 |   ŷ = predict(x)
 53 |   sum((y .- ŷ).^2)
 54 | end
 55 | 
 56 | x, y = rand(5), rand(2) # Dummy data
 57 | loss(x, y) # ~ 2.8
 58 | 
 59 | 
 60 | W = param(W)
 61 | b = param(b)
 62 | 
 63 | gs = Tracker.gradient(() -> loss(x, y), Params([W, b]))
 64 | 
 65 | using Flux.Tracker: update!
 66 | 
 67 | Δ = gs[W]
 68 | 
 69 | # Update the parameter and reset the gradient
 70 | update!(W, -0.1Δ)
 71 | 
 72 | loss(x, y) # ~ 1.4
 73 | 
 74 | function linear(in, out)
 75 |   W = param(randn(out, in))
 76 |   b = param(randn(out))
 77 |   x -> W * x .+ b
 78 | end
 79 | 
 80 | linear1 = linear(5, 3) # we can access linear1.W etc
 81 | linear2 = linear(3, 2)
 82 | 
 83 | model(x) = linear2(σ.(linear1(x)))
 84 | 
 85 | model(rand(5)) # => 2-element vector
 86 | 
 87 | ############
 88 | 
 89 | nin = 5; nhid = 2;
 90 | W = param(rand(nhid, nin))
 91 | b = param(rand(nhid))
 92 | f(x, W, b) = sum(W*x .+ b)
 93 | x = rand(nin)
 94 | f(x, W, b)
 95 | g = Tracker.gradient(() -> f(x,W,b), Params([x,W,b]))
 96 | 
 97 | 
 98 | nin = 3
 99 | wp = param([-1,0,1])
100 | bp = param(0.0)
101 | f(x, w, b) = sum( w .*x + b)
102 | xin = 1:3
103 | f(xin, wp, bp)
104 | g = Tracker.gradient(() -> f(xin, wp, bp), Params([wp,bp]))
105 | 
106 | ###
107 | 
108 | #https://stackoverflow.com/questions/49135107/logistic-regression-using-flux-jl
109 | 
110 | 
111 | using GLM, DataFrames, Flux.Tracker
112 | 
113 | Random.seed!(1)
114 | n = 100
115 | df = DataFrame(s1=rand(n), s2=rand(n))
116 | df[:y] = rand(n) .< 1 ./ (1 .+ exp.(-(1 .+ 2 .* df[1] .+ 0.5 .* df[2])))
117 | model2 = glm(@formula(y~s1+s2), df, Binomial(), LogitLink())
118 | 
119 | x = Matrix(df[1:2])
120 | y = df[3]
121 | N=10; x = rand(N,2); y = rand([false,true], N)
122 | W = param(rand(2,1))
123 | b = param(rand(1))
124 | predict(x) = 1.0 ./ (1.0 .+ exp.(-x*W .- b))
125 | loss(x,y) = -sum(log.(predict(x[y,:]))) - sum(log.(1 .- predict(x[.!y,:])))
126 | 
127 | function update!(ps, η = .01)
128 |   for w in ps
129 |     w.data .-= w.grad .* η
130 |     w.grad .= 0
131 |   end
132 | end
133 | 
134 | maxiter = 10
135 | for iter=1:maxiter
136 |   back!(loss(x,y))
137 |   if max(maximum(abs.(W.grad)), abs(b.grad[1])) < 0.01
138 |     break
139 |   end
140 |   update!((W, b))
141 | end
142 | 
143 | ################
144 | # Least squares regression
145 | 
146 | N = 10; D = 2;
147 | Random.seed!(1)
148 | X = rand(N, D)
149 | X1 = [ones(N) X]
150 | X1t = transpose(X1)
151 | wtrue = rand(D+1)
152 | yobs = X1 * wtrue
153 | loss(w) = sum((X1 * w - yobs) .^2)
154 | 
155 | # OLS solution
156 | # http://web.stanford.edu/class/ee103/julia_slides/julia_least_squares_slides.pdf
157 | west1 = inv(X1t * X1) * X1t * yobs
158 | west2 = X1 \ yobs
159 | #using LinearAlgebra
160 | #F = qr(X1)
161 | #@test isapprox(X1, F.Q * F.R)
162 | #west3 = inv(F.R)*(F.Q'*yobs)
163 | 
164 | # Gradient method
165 | wp = param(rand(D+1))
166 | gradloss(wp) = (back!(loss(wp)); wp.grad)
167 | dloss(w) = 2*(X1t * X1 * w - X1t * ytrue)
168 | @test isapprox(gradloss(wp), dloss(wp), 1e-2)
169 | 
170 | 
171 | using Optim
172 | x0 = zeros(2)
173 | result1 = optimize(rosenbrock, x0, LBFGS(m=5))
174 | sol1 = Optim.minimizer(result1)
175 | ncalls1 = result1.iterations
176 | 
177 | result2 = optimize(rosenbrock, x0, LBFGS(m=5); autodiff = :forward)
178 | sol2 = Optim.minimizer(result2)
179 | ncalls2 = result2.iterations
180 | 
181 | result3 = optimize(rosenbrock, rosenbrock_grad!, x0, LBFGS(m=5))
182 | sol3 = Optim.minimizer(result3)
183 | ncalls3 = result3.iterations
184 | 


--------------------------------------------------------------------------------
/FactorGraphs/structured_mean_field.jl:
--------------------------------------------------------------------------------
  1 | 
  2 | # Mean field vs structured mean field for a 1d Gaussian SSM with unknown process noise variance
  3 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/7_structured_variational_estimation.ipynb
  4 | 
  5 | Random.seed!(1)
  6 | n_samples = 100
  7 | v_data = 1.0
  8 | w_data = 10.0
  9 | s_0_data = 0.0
 10 | 
 11 | s_data = Vector{Float64}(undef, n_samples)
 12 | x_data = Vector{Float64}(undef, n_samples)
 13 | s_t_min_data = s_0_data
 14 | for t = 1:n_samples
 15 |     global s_t_min_data
 16 |     s_data[t] = s_t_min_data + sqrt(1/w_data)*randn() # Evolution model
 17 |     x_data[t] = s_data[t] + sqrt(v_data)*randn() # Observation model
 18 | 
 19 |     s_t_min_data = s_data[t]
 20 | end
 21 | ;
 22 | 
 23 | using ForneyLab
 24 | 
 25 | g = FactorGraph()
 26 | 
 27 | @RV w ~ Gamma(0.01, 0.01)
 28 | @RV s_0 ~ GaussianMeanVariance(0.0, 100.0)
 29 | 
 30 | s = Vector{Variable}(undef, n_samples)
 31 | x = Vector{Variable}(undef, n_samples)
 32 | s_t_min = s_0
 33 | for t = 1:n_samples
 34 |     global s_t_min
 35 |     @RV s[t] ~ GaussianMeanPrecision(s_t_min, w)
 36 |     @RV x[t] ~ GaussianMeanVariance(s[t], v_data)
 37 | 
 38 |     s_t_min = s[t]
 39 | 
 40 |     placeholder(x[t], :x, index=t)
 41 | end
 42 | ;
 43 | 
 44 | 
 45 | # Define recognition factorization
 46 | RecognitionFactorization()
 47 | 
 48 | q_w = RecognitionFactor(w)
 49 | q_s_0 = RecognitionFactor(s_0)
 50 | 
 51 | q_s = Vector{RecognitionFactor}(undef, n_samples)
 52 | for t=1:n_samples
 53 |     q_s[t] = RecognitionFactor(s[t])
 54 | end
 55 | 
 56 | println("generate code")
 57 | algo_w_mf = variationalAlgorithm(q_w, name="WMF")
 58 | algo_s_mf = variationalAlgorithm([q_s_0; q_s], name="SMF")
 59 | algo_F_mf = freeEnergyAlgorithm(name="MF");
 60 | 
 61 | 
 62 | println("compilinig")
 63 | eval(Meta.parse(algo_w_mf))
 64 | eval(Meta.parse(algo_s_mf))
 65 | eval(Meta.parse(algo_F_mf))
 66 | 
 67 | # Initialize data
 68 | data = Dict(:x => x_data)
 69 | n_its = 40
 70 | 
 71 | # Initial recognition distributions
 72 | marginals_mf = Dict{Symbol, ProbabilityDistribution}(:w => vague(Gamma))
 73 | for t = 0:n_samples
 74 |     marginals_mf[:s_*t] = vague(GaussianMeanPrecision)
 75 | end
 76 | 
 77 | # Run algorithm
 78 | F_mf = Vector{Float64}(undef, n_its)
 79 | for i = 1:n_its
 80 |     println("iteration $i")
 81 |     stepWMF!(data, marginals_mf)
 82 |     stepSMF!(data, marginals_mf)
 83 | 
 84 |     F_mf[i] = freeEnergyMF(data, marginals_mf)
 85 | end
 86 | ;
 87 | 
 88 | # Define the recognition factorization
 89 | q_struct = RecognitionFactorization(w, [s_0; s]; ids=[:WStruct, :SStruct])
 90 | 
 91 | println("generate code")
 92 | algo_struct = variationalAlgorithm(q_struct)
 93 | algo_F_struct = freeEnergyAlgorithm(name="Struct")
 94 | 
 95 | 
 96 | println("compile code")
 97 | eval(Meta.parse(algo_struct))
 98 | eval(Meta.parse(algo_F_struct))
 99 | 
100 | # Initial recognition distributions
101 | marginals_struct = Dict{Symbol, ProbabilityDistribution}(:w => vague(Gamma))
102 | 
103 | # Run algorithm
104 | F_struct = Vector{Float64}(undef, n_its)
105 | for i = 1:n_its
106 |     println("iteration $i")
107 |     stepSStruct!(data, marginals_struct)
108 |     stepWStruct!(data, marginals_struct)
109 | 
110 |     F_struct[i] = freeEnergyStruct(data, marginals_struct)
111 | end
112 | ;
113 | 
114 | using PyPlot
115 | 
116 | # Collect state estimates
117 | m_s_mf = [mean(marginals_mf[:s_*t]) for t=0:n_samples]
118 | v_s_mf = [var(marginals_mf[:s_*t]) for t=0:n_samples]
119 | m_s_struct = [mean(marginals_struct[:s_*t]) for t=0:n_samples]
120 | v_s_struct = [var(marginals_struct[:s_*t]) for t=0:n_samples]
121 | 
122 | # Plot estimated state
123 | figure(figsize=(8,4))
124 | #subplot(211)
125 | plot(collect(1:n_samples), x_data, "*", label="Data", color="black")
126 | plot(collect(0:n_samples), [0.0; s_data], "k-", label="True hidden state")
127 | plot(collect(0:n_samples), m_s_struct, "b-", label="Structured state estimate")
128 | fill_between(collect(0:n_samples), m_s_struct-sqrt.(v_s_struct), m_s_struct+sqrt.(v_s_struct), color="b", alpha=0.3);
129 | plot(collect(0:n_samples), m_s_mf, "r-", label="Mean-field state estimate")
130 | fill_between(collect(0:n_samples), m_s_mf-sqrt.(v_s_mf), m_s_mf+sqrt.(v_s_mf), color="r", alpha=0.3);
131 | grid("on")
132 | xlabel("Time")
133 | legend(loc="upper left", bbox_to_anchor=[-0.05, 1.5]);
134 | gcf()
135 | 
136 | #subplot(212)
137 | clf()
138 | plot(1:n_its, F_struct, color="blue", label="Structured")
139 | plot(1:n_its, F_mf, color="red", label="Mean-field")
140 | grid("on")
141 | legend()
142 | yscale("log")
143 | xlabel("VMP iteration")
144 | ylabel("Variational free energy")
145 | tight_layout();
146 | gcf()
147 | 


--------------------------------------------------------------------------------
/Old/logreg-opt-demo.jl:
--------------------------------------------------------------------------------
  1 | # Examples of how to fit a logistic regression model
  2 | 
  3 | #https://fluxml.ai/Flux.jl/v0.1.1/examples/logreg.html
  4 | #http://marubon-ds.blogspot.com/2018/05/deep-learning-with-julia-introduction.html
  5 | 
  6 | #https://github.com/FluxML/Flux.jl/blob/1cf37ab9eb806368d5702ab8053d5bc5b46180cf/src/layers/stateless.jl
  7 | 
  8 | #https://github.com/FluxML/NNlib.jl/blob/master/src/logsoftmax.jl
  9 | 
 10 | #https://www.linkedin.com/pulse/creating-deep-neural-network-model-learn-handwritten-digits-mike-gold/
 11 | 
 12 | 
 13 | import Flux
 14 | 
 15 | 
 16 | """
 17 | 	logreg_sample(W, X)
 18 | 
 19 | Sample from a logistic regression model with C classes and D features.
 20 | 
 21 | Inputs:
 22 | `W`: C*D matrix of weights.
 23 | `X`: D*N matrix of feature vectors, one example per column.
 24 | 
 25 | Outputs:
 26 | `labels`: N-vector of integer labels from {1,...,C}.
 27 | """
 28 | function logreg_sample(W::Array{Float64,2}, X::Array{Float64,2})
 29 | 	C, DW = size(W)
 30 | 	DX, N = size(X)
 31 | 	@assert DW == DX
 32 | 	labels = Array{Int}(undef, N)
 33 | 	# Binary case
 34 | 	#y = Flux.logistic.(X * w) .< rand(N)
 35 | 	for i=1:N
 36 | 		probs = Flux.softmax(W * X[:,i])
 37 | 		dist = Distributions.Categorical(probs)
 38 | 		labels[i] = rand(dist)
 39 | 	end
 40 | 	return labels
 41 | end
 42 | # @code_warntype logreg_sample(W,X) # LGTM
 43 | 
 44 | 
 45 | function make_test_data()
 46 | 	Random.seed!(1)
 47 | 	C = 3;
 48 | 	D = 4;
 49 | 	N = 5;
 50 | 	X = randn(D, N) # note that examples are stored in the columns
 51 | 	W = randn(C, D)
 52 | 	Y_cat = logreg_sample(W, X)
 53 | 	Y_onehot = Flux.onehotbatch(Y_cat, 1:C) # C*N matrix of bools
 54 | 	@test Y_cat[1] == findfirst(x -> x, Y_onehot[:,1])
 55 | 	return W, X, Y_onehot
 56 | end
 57 | 
 58 | """
 59 | 	logreg_nll_matrix(W, X, Y)
 60 | 
 61 | Compute average negative log likelihood for a logistic regression model.
 62 | 
 63 | Inputs:
 64 | `W`: C*D matrix of weights.
 65 | `X`: D*N matrix of feature vectors, one example per column.
 66 | `Y`: C*N one hot matrix of labels.
 67 | 
 68 | Outputs:
 69 | nll: -1/N * sum_{n=1}^N sum_{c=1}^C Y[c,n] * log(Pr[c,n])
 70 |  where Pr[:,n] = softmax(W*X[:,n])
 71 | """
 72 | function logreg_nll_matrix(W::Array{Float64,2}, X::Array{Float64,2},
 73 | 					Y::Flux.OneHotMatrix{Array{Flux.OneHotVector,1}})
 74 | 	CW, DW = size(W)
 75 | 	DX, NX = size(X)
 76 | 	CY, NY = size(Y)
 77 | 	@assert DW == DX
 78 | 	@assert CY == CW
 79 | 	@assert N == NY
 80 | 	nll = 0.0
 81 | 	for i=1:NX
 82 | 		x = X[:,i]
 83 | 		y = Y[:,i]
 84 | 		probs = Flux.softmax(W*x)
 85 | 		nll += -sum(y .* log.(probs)) # PMLv1 p255
 86 | 	end
 87 | 	return nll / N
 88 | end
 89 | 
 90 | 
 91 | 
 92 | """
 93 | 	logreg_nll(w, X, Y)
 94 | 
 95 | Compute average negative log likelihood for a logistic regression model.
 96 | 
 97 | Inputs:
 98 | `w`: C*D 1D vector of weights, c changing fastest.
 99 | `X`: D*N matrix of feature vectors, one example per column.
100 | `Y`: C*N one hot matrix of labels.
101 | 
102 | Outputs:
103 | nll: -1/N * sum_{n=1}^N sum_{c=1}^C Y[c,n] * log(Pr[c,n])
104 |  where Pr[:,n] = softmax(W*X[:,n])
105 | """
106 | function logreg_nll(w::Array{Float64,1}, X::Array{Float64,2},
107 | 					Y::Flux.OneHotMatrix{Array{Flux.OneHotVector,1}})
108 | 	D, NX = size(X)
109 | 	C, NY = size(Y)
110 | 	@assert NX == NY
111 | 	W = reshape(w, (C, D))
112 | 	return logreg_nll_matrix(W, X, Y)
113 | end
114 | 
115 | 
116 | function logreg_nll_test()
117 | 	W, X, Y = make_test_data()
118 | 	nll = logreg_nll(vec(W), X, Y)
119 | 	logits = W*X # CxD
120 | 	nll2 = Flux.logitcrossentropy(logits, Y)
121 | 	@test isapprox(nll, nll2)
122 | end
123 | 
124 | function logreg_nll_grad(w, X, Y)
125 | 	D, NX = size(X)
126 | 	C, NY = size(Y)
127 | 	@assert NX == NY
128 | 	W = reshape(w, (C, D))
129 | 	G = zeros(Float64, C, D)
130 | 	for i=1:NX
131 | 		x = X[:,i]
132 | 		y = Y[:,i]
133 | 		probs = Flux.softmax(W*x)
134 | 		delta = probs - y
135 | 		for c=1:C
136 | 			G[c,:] += delta[c] .* x
137 | 		end
138 | 	end
139 | 	return vec(G)
140 | end
141 | 
142 | function logreg_nll_grad_test()
143 | 	W0, X, Y = make_test_data()
144 | 	w0 = vec(W0)
145 | 	g = logreg_nll_grad(w0, X, Y)
146 | 	g2 = Flux.gradient(w -> logreg_nll(w, X, Y), w0)
147 | 	@test isapprox(g, g2)
148 | end
149 | 
150 | import Optim
151 | function logreg_fit_test()
152 | 	W, X, Y = make_test_data()
153 | 	w0 = vec(randn(size(W)))
154 | 	objective(w) = logreg_nll(w, X, Y)
155 | 	result = Optim.optimize(objective, w0, Optim.LBFGS())
156 | 	West = reshape(Optim.minimizer(result), (C,D))
157 | 	ncalls = result.iterations
158 | end
159 | 
160 | 
161 | #logreg_fit_test()
162 | #logreg_sample_test()
163 | #logreg_nll_test()
164 | #logreg_nll_grad_test()
165 | 


--------------------------------------------------------------------------------
/FactorGraphs/hmm.jl:
--------------------------------------------------------------------------------
  1 | 
  2 | # Variational Bayes for a discrete observation HMM
  3 | 
  4 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/8_hidden_markov_model_estimation.ipynb
  5 | 
  6 | using ForneyLab
  7 | 
  8 | n_samples = 20
  9 | A_data = [0.9 0.0 0.1; 0.1 0.9 0.0; 0.0 0.1 0.9] # Transition probabilities (some transitions are impossible)
 10 | B_data = [0.9 0.05 0.05; 0.05 0.9 0.05; 0.05 0.05 0.9] # Observation noise
 11 | 
 12 | s_0_data = [1.0, 0.0, 0.0] # Initial state
 13 | 
 14 | # Generate some data
 15 | Random.seed!(1)
 16 | s_data = Vector{Vector{Float64}}(undef, n_samples) # one-hot encoding of the states
 17 | x_data = Vector{Vector{Float64}}(undef, n_samples) # one-hot encoding of the observations
 18 | s_t_min_data = s_0_data
 19 | for t = 1:n_samples
 20 |     global s_t_min_data
 21 |     a = A_data*s_t_min_data
 22 |     s_data[t] = sample(ProbabilityDistribution(Categorical, p=a./sum(a))) # Simulate state transition
 23 |     b = B_data*s_data[t]
 24 |     x_data[t] = sample(ProbabilityDistribution(Categorical, p=b./sum(b))) # Simulate observation
 25 | 
 26 |     s_t_min_data = s_data[t]
 27 | end
 28 | ;
 29 | 
 30 | g = FactorGraph()
 31 | 
 32 | @RV A ~ Dirichlet(ones(3,3)) # Vague prior on transition model
 33 | @RV B ~ Dirichlet([10.0 1.0 1.0; 1.0 10.0 1.0; 1.0 1.0 10.0]) # Stronger prior on observation model
 34 | @RV s_0 ~ Categorical(1/3*ones(3))
 35 | 
 36 | s = Vector{Variable}(undef, n_samples) # one-hot coding
 37 | x = Vector{Variable}(undef, n_samples) # one-hot coding
 38 | s_t_min = s_0
 39 | for t = 1:n_samples
 40 |     global s_t_min
 41 |     @RV s[t] ~ Transition(s_t_min, A)
 42 |     @RV x[t] ~ Transition(s[t], B)
 43 | 
 44 |     s_t_min = s[t]
 45 | 
 46 |     placeholder(x[t], :x, index=t, dims=(3,))
 47 | end;
 48 | 
 49 | # Define the recognition factorization
 50 | q = RecognitionFactorization(A, B, [s_0; s], ids=[:A, :B, :S])
 51 | 
 52 | # Generate VMP algorithm
 53 | algo = variationalAlgorithm(q)
 54 | 
 55 | # Construct variational free energy evaluation code
 56 | algo_F = freeEnergyAlgorithm(q);
 57 | 
 58 | # Load algorithms
 59 | eval(Meta.parse(algo))
 60 | eval(Meta.parse(algo_F))
 61 | 
 62 | # Initial recognition distributions
 63 | marginals = Dict{Symbol, ProbabilityDistribution}(
 64 |     :A => vague(Dirichlet, (3,3)),
 65 |     :B => vague(Dirichlet, (3,3)))
 66 | 
 67 | # Initialize data
 68 | data = Dict(:x => x_data)
 69 | n_its = 20
 70 | 
 71 | # Run algorithm
 72 | F = Vector{Float64}(undef, n_its)
 73 | for i = 1:n_its
 74 |     stepS!(data, marginals)
 75 |     stepB!(data, marginals)
 76 |     stepA!(data, marginals)
 77 | 
 78 |     F[i] = freeEnergy(data, marginals)
 79 | end
 80 | ;
 81 | 
 82 | using PyPlot
 83 | 
 84 | # Plot free energy
 85 | clf()
 86 | plot(1:n_its, F, color="black")
 87 | grid("on")
 88 | xlabel("Iteration")
 89 | ylabel("Free Energy")
 90 | xlim(0,n_its);
 91 | gcf()
 92 | 
 93 | figure(figsize=(10,5))
 94 | # Collect state estimates
 95 | x_obs = [findfirst(x_i.==1.0) for x_i in x_data]
 96 | s_true = [findfirst(s_i.==1.0) for s_i in s_data]
 97 | 
 98 | # Plot simulated state trajectory and observations
 99 | clf()
100 | subplot(211)
101 | plot(1:n_samples, x_obs, "k*", label="Observations x", markersize=7)
102 | plot(1:n_samples, s_true, "k--", label="True state s")
103 | yticks([1.0, 2.0, 3.0], ["Red", "Green", "Blue"])
104 | grid("on")
105 | xlabel("Time")
106 | legend(loc="upper left")
107 | xlim(0,n_samples)
108 | ylim(0.9,3.1)
109 | title("Data set and true state trajectory")
110 | gcf()
111 | 
112 | # Plot inferred state sequence
113 | subplot(212)
114 | m_s = [mean(marginals[:s_*t]) for t=1:n_samples]
115 | m_s_1 = [m_s_t[1] for m_s_t in m_s]
116 | m_s_2 = [m_s_t[2] for m_s_t in m_s]
117 | m_s_3 = [m_s_t[3] for m_s_t in m_s]
118 | 
119 | fill_between(1:n_samples, zeros(n_samples), m_s_1, color="red")
120 | fill_between(1:n_samples, m_s_1, m_s_1 + m_s_2, color="green")
121 | fill_between(1:n_samples, m_s_1 + m_s_2, ones(n_samples), color="blue")
122 | xlabel("Time")
123 | ylabel("State belief")
124 | grid("on")
125 | title("Inferred state trajectory");
126 | gcf()
127 | 
128 | # True state transition probabilities
129 | clf()
130 | PyPlot.plt[:matshow](A_data, vmin=0.0, vmax=1.0)
131 | ttl = title("True state transition probabilities")
132 | ttl[:set_position]([.5, 1.15])
133 | yticks([0, 1, 2], ["Red", "Green", "Blue"])
134 | xticks([0, 1, 2], ["Red", "Green", "Blue"], rotation="vertical")
135 | colorbar()
136 | gcf()
137 | 
138 | # Inferred state transition probabilities
139 | PyPlot.plt[:matshow](mean(marginals[:A]),  vmin=0.0, vmax=1.0)
140 | ttl = title("Inferred state transition probabilities")
141 | ttl[:set_position]([.5, 1.15])
142 | yticks([0, 1, 2], ["Red", "Green", "Blue"])
143 | xticks([0, 1, 2], ["Red", "Green", "Blue"], rotation="vertical")
144 | colorbar();
145 | gcf()
146 | 


--------------------------------------------------------------------------------
/Old/logreg-binary-opt-demo.jl:
--------------------------------------------------------------------------------
  1 | # Examples of how to fit a binary logistic regression model using various methods.
  2 | # We compute gradient analytically or using flux.
  3 | # We use GLM package as a unit test of correctness.
  4 | # https://stackoverflow.com/questions/49135107/logistic-regression-using-flux-jl
  5 | 
  6 | 
  7 | using Flux
  8 | 
  9 | function logreg_binary_predict(w, X)
 10 | 	DX, N = size(X)
 11 | 	DW = length(w)
 12 | 	@assert DX == DW
 13 | 	logits = [sum(w .* X[:,i]) for i in 1:N]
 14 | 	probs = [Flux.sigmoid(l) for l in logits]
 15 | 	#W = reshape(w, (1, D))
 16 | 	#probs = vec(Flux.sigmoid.(W * X)) # 1xN->N vector
 17 | 	labels = [(p > 0.5 ? 1 : 0) for p in probs]
 18 | 	return probs, labels, logits
 19 | end
 20 | 
 21 | function logreg_binary_sample(w, X)
 22 | 	probs = logreg_binary_predict(w, X)[1]
 23 | 	D, N = size(X)
 24 | 	labels = probs .< rand(N)
 25 | 	return labels
 26 | end
 27 | 
 28 | function make_test_data_binary(;N=20, D=2)
 29 | 	Random.seed!(1)
 30 | 	X = randn(D, N) # note that examples are stored in the columns
 31 | 	w = randn(D)
 32 | 	y = logreg_binary_sample(w, X)
 33 | 	return w, X, y
 34 | end
 35 | 
 36 | function logreg_binary_nll(w, X, y)
 37 | 	D, N = size(X)
 38 | 	p = logreg_binary_predict(w, X)[1]
 39 | 	return -sum(y .* log.(p) + (1 .- y) .* log.(1 .- p)) / N
 40 | end
 41 | 
 42 | function logreg_binary_nll_test()
 43 | 	w, X, y = make_test_data_binary()
 44 | 	D, N = size(X)
 45 | 	nll1 = logreg_binary_nll(w, X, y)
 46 | 	probs, labels, logits = logreg_binary_predict(w, X)
 47 | 	nll2 =  sum(Flux.logitbinarycrossentropy.(logits, y)) / N
 48 | 	@test isapprox(nll1, nll2)
 49 | end
 50 | 
 51 | function logreg_binary_nll_grad(w, X, y)
 52 | 	D, N = size(X)
 53 | 	g = zeros(Float64,D)
 54 | 	for i=1:N
 55 | 		x = X[:,i]
 56 | 		p = Flux.sigmoid(sum(w .* x))
 57 | 		g[:] += (p - y[i]) .* x
 58 | 	end
 59 | 	return g / N
 60 | end
 61 | 
 62 | function logreg_binary_nll_grad_test()
 63 | 	w0, X, y = make_test_data_binary()
 64 | 	g1 = logreg_binary_nll_grad(w0, X, y)
 65 | 	g2 = Flux.gradient(w -> logreg_binary_nll(w, X, y), w0)[1]
 66 | 	@test isapprox(g1, g2)
 67 | end
 68 | 
 69 | import Optim
 70 | function logreg_binary_fit_bfgs(X, y; w0=nothing, max_iter=100)
 71 | 	D, N = size(X)
 72 | 	if isnothing(w0); w0 = randn(D); end
 73 | 	objective(w) = logreg_binary_nll(w, X, y)
 74 | 	grad(w) = logreg_binary_nll_grad(w, X, y)
 75 | 	solver = Optim.LBFGS(;linesearch = Optim.BackTracking())
 76 | 	opts = Optim.Options(iterations = max_iter)
 77 | 	result = Optim.optimize(objective, grad, w0, solver, opts; inplace=false)
 78 | 	w_est = result.minimizer
 79 | 	return w_est
 80 | end
 81 | 
 82 | include("first-order.jl")
 83 | function logreg_binary_fit_adam(X, y; w0=nothing, max_iter=100)
 84 | 	D, N = size(X)
 85 | 	if isnothing(w0); w0 = randn(D); end
 86 | 	objective(w) = logreg_binary_nll(w, X, y)
 87 | 	grad(w) = logreg_binary_nll_grad(w, X, y)
 88 | 	solver = Adam(0.2)
 89 | 	w_est = run_descent_method(solver, objective, grad, w0; max_iter=max_iter)
 90 | 	return w_est
 91 | end
 92 | 
 93 | function logreg_binary_fit_flux(X, y; w0=nothing, max_iter=100)
 94 | 	D, N = size(X)
 95 | 	#initW(out, in) = reshape(w0, (1,D))
 96 | 	#https://github.com/FluxML/Flux.jl/issues/332
 97 |  	#model = Chain(Dense(D, 1; initW = initW), sigmoid)
 98 | 	#model = Chain(Dense(D, 1), sigmoid)
 99 | 	model = Chain(Dense(D, 2), softmax)
100 | 	#loss(x, y) = Flux.binarycrossentropy(model(x), y)
101 | 	loss(x, y) = crossentropy(model(x), y)
102 | 	Y = onehotbatch(y, 0:1)
103 | 	data = (X,Y)
104 | 	callback() = @show(loss(X, Y))
105 | 	opt = Flux.Optimise.ADAM()
106 | 	for epoch=1:max_iter
107 | 		Flux.train!(loss, params(model), data, opt, cb = callback)
108 | 	end
109 | end
110 | 
111 | #=
112 | 	D = 2; N = 20;
113 | 	X = randn(D, N)
114 | 	y = rand([0,1], N)
115 | 	D, N = size(X)
116 | 	data = repeated((X,Y),1)
117 | 
118 | 	#model = Chain(Dense(D, 1), sigmoid) #  DimensionMismatch("multiplicative identity defined only for square matrices")
119 | 	#model = Dense(D, 1, sigmoid) #LoadError: no method matching eps(::TrackedArray{…,Array{Float32,2}})
120 | 	#loss(x, y) = Flux.binarycrossentropy(model(x), y)
121 | 	#Y = y
122 | 	model = Chain(Dense(D, 2), softmax)
123 | 	#model = Dense(D, 2, softmax) #  MethodError: no method matching similar(::Float32)
124 | 	loss(x, y) = crossentropy(model(x), y)
125 | 	Y = onehotbatch(y, 0:1)
126 | 
127 | 	callback() = @show(loss(X, Y))
128 | 	opt = Flux.Optimise.ADAM()
129 | 	Flux.train!(loss, params(model), data, opt, cb = callback)
130 | =#
131 | 
132 | #=
133 | #https://github.com/FluxML/model-zoo/blob/master/vision/mnist/mlp.jl
134 | 
135 | imgs = MNIST.images() #60k-vector, imgs[1] is 28x28 matrix of Grayscale
136 | # Stack images into one large batch
137 | XX = hcat(float.(reshape.(imgs, :))...) # 784x60k of float
138 | 
139 | labels = MNIST.labels()
140 | Y = onehotbatch(labels, 0:9) # 10k60k OneHotMatrix
141 | =#
142 | 
143 | 
144 | using GLM, DataFrames
145 | #https://discourse.julialang.org/t/how-to-fit-a-glm-to-all-unnamed-features-of-arbitrary-design-matrix/20490/3
146 | function logreg_binary_fit_glm(X, y)
147 | 	D, N = size(X)
148 | 	XT = permutedims(X)
149 | 	model = fit(GeneralizedLinearModel, XT, y, Bernoulli())
150 | 	#df_y = DataFrame(y[:, :], [:yname])
151 | 	#df_x_names = [Symbol("x$i") for i in 1:size(Xt)[2]]
152 | 	#df_x = DataFrame(Xt, df_x_names)
153 | 	#df = hcat(df_y, df_x)
154 | 	#lhs = :yname
155 | 	# include all variables :x1 to :xD but exclude intercept (hence -1)
156 | 	#rhs = Expr(:call, :+, -1, df_x_names...)
157 | 	#model = glm(@eval(@formula($(lhs) ~ $(rhs))), df, Bernoulli(), LogitLink())
158 | 	return coef(model)
159 | end
160 | 
161 | function logreg_binary_fit_test()
162 | 	wtrue, X, y = make_test_data_binary(;D=2)
163 | 	D, N = size(X)
164 | 	Random.seed!(1)
165 | 	winit = randn(D)
166 | 	wopt = logreg_binary_fit_glm(X, y)
167 | 	ll_opt = logreg_binary_nll(wopt, X, y)
168 | 	println("glm: ll $ll_opt, w $wopt")
169 | 	expts = Tuple{Any, String}[]
170 | 	push!(expts, (logreg_binary_fit_bfgs, "bfgs"))
171 | 	push!(expts, (logreg_binary_fit_adam, "adam"))
172 | 	push!(expts, (logreg_binary_fit_flux, "flux"))
173 | 	for (solver, method_name) in expts
174 | 		west = solver(X, y, w0=winit, max_iter=100)
175 | 		ll = logreg_binary_nll(west, X, y)
176 | 		println("$method_name, ll $ll, w $west")
177 | 		#@test isapprox(west, wopt; atol=0.01)
178 | 		#@test isapprox(ll, ll_opt; atol=0.1)
179 | 	end
180 | end
181 | 
182 | logreg_binary_nll_test()
183 | logreg_binary_nll_grad_test()
184 | logreg_binary_fit_test()
185 | 


--------------------------------------------------------------------------------
/Old/logreg-binary-opt-old.jl:
--------------------------------------------------------------------------------
  1 | # Examples of how to fit a binary logistic regression model using various methods.
  2 | # We compute gradient analytically or using flux.
  3 | # We use GLM package as a unit test of correctness.
  4 | # https://stackoverflow.com/questions/49135107/logistic-regression-using-flux-jl
  5 | 
  6 | 
  7 | using Flux
  8 | 
  9 | function logreg_binary_predict(w, X)
 10 | 	DX, N = size(X)
 11 | 	DW = length(w)
 12 | 	@assert DX == DW
 13 | 	logits = [sum(w .* X[:,i]) for i in 1:N]
 14 | 	probs = [Flux.sigmoid(l) for l in logits]
 15 | 	#W = reshape(w, (1, D))
 16 | 	#probs = vec(Flux.sigmoid.(W * X)) # 1xN->N vector
 17 | 	labels = [(p > 0.5 ? 1 : 0) for p in probs]
 18 | 	return probs, labels, logits
 19 | end
 20 | 
 21 | function logreg_binary_sample(w, X)
 22 | 	probs = logreg_binary_predict(w, X)[1]
 23 | 	D, N = size(X)
 24 | 	labels = probs .< rand(N)
 25 | 	return labels
 26 | end
 27 | 
 28 | function make_test_data_binary(;N=20, D=2)
 29 | 	Random.seed!(1)
 30 | 	X = randn(D, N) # note that examples are stored in the columns
 31 | 	w = randn(D)
 32 | 	y = logreg_binary_sample(w, X)
 33 | 	return w, X, y
 34 | end
 35 | 
 36 | function logreg_binary_nll(w, X, y)
 37 | 	D, N = size(X)
 38 | 	p = logreg_binary_predict(w, X)[1]
 39 | 	return -sum(y .* log.(p) + (1 .- y) .* log.(1 .- p)) / N
 40 | end
 41 | 
 42 | function logreg_binary_nll_test()
 43 | 	w, X, y = make_test_data_binary()
 44 | 	D, N = size(X)
 45 | 	nll1 = logreg_binary_nll(w, X, y)
 46 | 	probs, labels, logits = logreg_binary_predict(w, X)
 47 | 	nll2 =  sum(Flux.logitbinarycrossentropy.(logits, y)) / N
 48 | 	@test isapprox(nll1, nll2)
 49 | end
 50 | 
 51 | function logreg_binary_nll_grad(w, X, y)
 52 | 	D, N = size(X)
 53 | 	g = zeros(Float64,D)
 54 | 	for i=1:N
 55 | 		x = X[:,i]
 56 | 		p = Flux.sigmoid(sum(w .* x))
 57 | 		g[:] += (p - y[i]) .* x
 58 | 	end
 59 | 	return g / N
 60 | end
 61 | 
 62 | function logreg_binary_nll_grad_test()
 63 | 	w0, X, y = make_test_data_binary()
 64 | 	g1 = logreg_binary_nll_grad(w0, X, y)
 65 | 	g2 = Flux.gradient(w -> logreg_binary_nll(w, X, y), w0)[1]
 66 | 	@test isapprox(g1, g2)
 67 | end
 68 | 
 69 | import Optim
 70 | function logreg_binary_fit_bfgs(X, y; w0=nothing, max_iter=100)
 71 | 	D, N = size(X)
 72 | 	if isnothing(w0); w0 = randn(D); end
 73 | 	objective(w) = logreg_binary_nll(w, X, y)
 74 | 	grad(w) = logreg_binary_nll_grad(w, X, y)
 75 | 	solver = Optim.LBFGS(;linesearch = Optim.BackTracking())
 76 | 	opts = Optim.Options(iterations = max_iter)
 77 | 	result = Optim.optimize(objective, grad, w0, solver, opts; inplace=false)
 78 | 	w_est = result.minimizer
 79 | 	return w_est
 80 | end
 81 | 
 82 | include("first-order.jl")
 83 | function logreg_binary_fit_adam(X, y; w0=nothing, max_iter=100)
 84 | 	D, N = size(X)
 85 | 	if isnothing(w0); w0 = randn(D); end
 86 | 	objective(w) = logreg_binary_nll(w, X, y)
 87 | 	grad(w) = logreg_binary_nll_grad(w, X, y)
 88 | 	solver = Adam(0.2)
 89 | 	w_est = run_descent_method(solver, objective, grad, w0; max_iter=max_iter)
 90 | 	return w_est
 91 | end
 92 | 
 93 | function logreg_binary_fit_flux(X, y; w0=nothing, max_iter=100)
 94 | 	D, N = size(X)
 95 | 	#initW(out, in) = reshape(w0, (1,D))
 96 | 	#https://github.com/FluxML/Flux.jl/issues/332
 97 |  	#model = Chain(Dense(D, 1; initW = initW), sigmoid)
 98 | 	#model = Chain(Dense(D, 1), sigmoid)
 99 | 	model = Chain(Dense(D, 2), softmax)
100 | 	#loss(x, y) = Flux.binarycrossentropy(model(x), y)
101 | 	loss(x, y) = crossentropy(model(x), y)
102 | 	Y = onehotbatch(y, 0:1)
103 | 	data = (X,Y)
104 | 	callback() = @show(loss(X, Y))
105 | 	opt = Flux.Optimise.ADAM()
106 | 	for epoch=1:max_iter
107 | 		Flux.train!(loss, params(model), data, opt, cb = callback)
108 | 	end
109 | end
110 | 
111 | #=
112 | 	D = 2; N = 20;
113 | 	X = randn(D, N)
114 | 	y = rand([0,1], N)
115 | 	D, N = size(X)
116 | 	data = repeated((X,Y),1)
117 | 
118 | 	#model = Chain(Dense(D, 1), sigmoid) #  DimensionMismatch("multiplicative identity defined only for square matrices")
119 | 	#model = Dense(D, 1, sigmoid) #LoadError: no method matching eps(::TrackedArray{…,Array{Float32,2}})
120 | 	#loss(x, y) = Flux.binarycrossentropy(model(x), y)
121 | 	#Y = y
122 | 	model = Chain(Dense(D, 2), softmax)
123 | 	#model = Dense(D, 2, softmax) #  MethodError: no method matching similar(::Float32)
124 | 	loss(x, y) = crossentropy(model(x), y)
125 | 	Y = onehotbatch(y, 0:1)
126 | 
127 | 	callback() = @show(loss(X, Y))
128 | 	opt = Flux.Optimise.ADAM()
129 | 	Flux.train!(loss, params(model), data, opt, cb = callback)
130 | =#
131 | 
132 | #=
133 | #https://github.com/FluxML/model-zoo/blob/master/vision/mnist/mlp.jl
134 | 
135 | imgs = MNIST.images() #60k-vector, imgs[1] is 28x28 matrix of Grayscale
136 | # Stack images into one large batch
137 | XX = hcat(float.(reshape.(imgs, :))...) # 784x60k of float
138 | 
139 | labels = MNIST.labels()
140 | Y = onehotbatch(labels, 0:9) # 10k60k OneHotMatrix
141 | =#
142 | 
143 | 
144 | using GLM, DataFrames
145 | #https://discourse.julialang.org/t/how-to-fit-a-glm-to-all-unnamed-features-of-arbitrary-design-matrix/20490/3
146 | function logreg_binary_fit_glm(X, y)
147 | 	D, N = size(X)
148 | 	XT = permutedims(X)
149 | 	model = fit(GeneralizedLinearModel, XT, y, Bernoulli())
150 | 	#df_y = DataFrame(y[:, :], [:yname])
151 | 	#df_x_names = [Symbol("x$i") for i in 1:size(Xt)[2]]
152 | 	#df_x = DataFrame(Xt, df_x_names)
153 | 	#df = hcat(df_y, df_x)
154 | 	#lhs = :yname
155 | 	# include all variables :x1 to :xD but exclude intercept (hence -1)
156 | 	#rhs = Expr(:call, :+, -1, df_x_names...)
157 | 	#model = glm(@eval(@formula($(lhs) ~ $(rhs))), df, Bernoulli(), LogitLink())
158 | 	return coef(model)
159 | end
160 | 
161 | function logreg_binary_fit_test()
162 | 	wtrue, X, y = make_test_data_binary(;D=2)
163 | 	D, N = size(X)
164 | 	Random.seed!(1)
165 | 	winit = randn(D)
166 | 	wopt = logreg_binary_fit_glm(X, y)
167 | 	ll_opt = logreg_binary_nll(wopt, X, y)
168 | 	println("glm: ll $ll_opt, w $wopt")
169 | 	expts = Tuple{Any, String}[]
170 | 	push!(expts, (logreg_binary_fit_bfgs, "bfgs"))
171 | 	push!(expts, (logreg_binary_fit_adam, "adam"))
172 | 	push!(expts, (logreg_binary_fit_flux, "flux"))
173 | 	for (solver, method_name) in expts
174 | 		west = solver(X, y, w0=winit, max_iter=100)
175 | 		ll = logreg_binary_nll(west, X, y)
176 | 		println("$method_name, ll $ll, w $west")
177 | 		#@test isapprox(west, wopt; atol=0.01)
178 | 		#@test isapprox(ll, ll_opt; atol=0.1)
179 | 	end
180 | end
181 | 
182 | logreg_binary_nll_test()
183 | logreg_binary_nll_grad_test()
184 | logreg_binary_fit_test()
185 | 


--------------------------------------------------------------------------------
/FactorGraphs/kalman_smoother_2d.jl:
--------------------------------------------------------------------------------
  1 | 
  2 | # RTS smoothing for a linear Gaussian state space model.
  3 | # based on https://github.com/probml/pmtk3/blob/master/demos/kalmanTrackingDemo.m
  4 | # We consider the example from "Machine learning: a probabilistic perspective"
  5 | # (Murphy, 2012) fig 18.1.
  6 | 
  7 | using ForneyLab,  LinearAlgebra, Test
  8 | import Distributions # "Multivariate" clashes with ForneyLab
  9 | #using PDMats
 10 | 
 11 | function make_diagonal(v)
 12 |     d = length(v)
 13 |     A = zeros(d,d)
 14 |     A[diagind(A)] = v
 15 |     return A
 16 | end
 17 | 
 18 | function make_params()
 19 |     F = [1 0 1 0; 0 1 0 1; 0 0 1 0; 0 0 0 1.0];
 20 |     H = [1.0 0 0 0; 0 1 0 0];
 21 |     nobs, nhidden = size(H)
 22 |     #Q = PDiagMat(fill(0.001, nhidden))
 23 |     #Q = make_diagonal(fill(0.001, nhidden))
 24 |     e = 0.001; Q = [e 0 0 0; 0 e 0 0; 0 0 e 0; 0 0 0 e]
 25 |     #R = PDiagMat(fill(1.0, nobs))
 26 |     #R = make_diagonal(fill(1.0, nobs))
 27 |     e = 1.0; R = [e 0; 0 e]
 28 |     mu0 = [8, 10, 1, 0.0];
 29 |     #V0 = PDiagMat(fill(1, nhidden))
 30 |     #V0 = make_diagonal(fill(1, nhidden))
 31 |     e = 1.0; V0 = [e 0 0 0; 0 e 0 0; 0 0 e 0; 0 0 0 e]
 32 |     params = (mu0 = mu0, V0 = V0, F = F, H = H, Q = Q, R = R)
 33 |     return params
 34 | end
 35 | 
 36 | function lin_gauss_ssm_sample(T, params)
 37 |     # z(t+1) = F * z(t-1) + N(0,Q) so F is D*D
 38 |     # y(t) = H * z(t) + N(0, R) so H is O*D
 39 |     # Returns zs: H*T, ys: O*T
 40 |     F = params.F; H = params.H; Q = params.Q; R = params.R; mu0 = params.mu0; V0 = params.V0;
 41 |     nobs, nhidden = size(H)
 42 |     zs = Array{Float64}(undef, nhidden, T)
 43 |     ys = Array{Float64}(undef, nobs, T)
 44 |     prior_z = Distributions.MvNormal(mu0, V0)
 45 |     process_noise_dist = Distributions.MvNormal(Q)
 46 |     obs_noise_dist = Distributions.MvNormal(R)
 47 |     zs[:,1] = rand(prior_z)
 48 |     ys[:,1] = H*zs[:,1] + rand(obs_noise_dist)
 49 |     for t=2:T
 50 |         zs[:,t] = F*zs[:,t-1] + rand(process_noise_dist)
 51 |         ys[:,t] = H*zs[:,t] + rand(obs_noise_dist)
 52 |     end
 53 |     return zs, ys
 54 | end
 55 | 
 56 | function convert_2d_matrix_to_vec_of_vec(M::Array{Float64,2})
 57 |     N, T = size(M)
 58 |     v = Vector{Vector{Float64}}(undef, T)
 59 |     for i=1:T
 60 |         v[i] = M[:,i]
 61 |     end
 62 |     return v
 63 | end
 64 | 
 65 | function make_data(T, params)
 66 |     nobs, nhidden = size(params.H)
 67 |     (zs, ys) = lin_gauss_ssm_sample(T, params)
 68 |     #zs = randn(nhidden, T)
 69 |     #ys = randn(nobs, T)
 70 |     # ForneyLab cannot handle matrix observations
 71 |     # https://github.com/biaslab/ForneyLab.jl/issues/17
 72 |     y_data =  convert_2d_matrix_to_vec_of_vec(ys)
 73 | end
 74 | 
 75 | 
 76 | function make_factor_graph(T, params)
 77 |     F = params.F; H = params.H; Q = params.Q; R = params.R; mu0 = params.mu0; V0 = params.V0;
 78 |     g = FactorGraph()
 79 |     @RV x0 ~ GaussianMeanVariance(mu0, V0)
 80 |     x = Vector{Variable}(undef, T)
 81 |     y = Vector{Variable}(undef, T)
 82 |     x_t_prev = x0
 83 |     for t = 1:T
 84 |         #global x_t_prev
 85 |         mu_x = F * x_t_prev
 86 |         @RV [id=:x_*t] x[t] ~ GaussianMeanVariance(mu_x, Q)
 87 |         #@RV x[t] ~ GaussianMeanVariance(mu_x, Q)
 88 |         #x[t].id = Symbol("x_", t) #:x_t;
 89 |         mu_y = H * x[t]
 90 |         @RV y[t] ~ GaussianMeanVariance(mu_y, R)
 91 |         nobs, nhidden = size(H)
 92 |         placeholder(y[t], :y, dims=(nobs,), index=t)
 93 |         x_t_prev = x[t]
 94 |     end
 95 |     fg = (x0 = x0, x = x, y = y)
 96 |     return fg
 97 | end
 98 | 
 99 | 
100 | function make_fg_inference(fg)
101 |     println("generating code")
102 |     algo = Meta.parse(sumProductAlgorithm(fg.x))
103 |     eval(algo) # Compile the step! function
104 |     function infer(y_data)
105 |         T = length(y_data)
106 |         D = length(y_data[1])
107 |         id = Symbol("y")
108 |         data = Dict(id => y_data)
109 |         #data = Dict(fg.y => y_data)
110 |         println("running forwards backwards")
111 |         marginals = step!(data)
112 |         println("computing marginals")
113 |         m_x = [mean(marginals[:x_*t]) for t = 1:T] # T of D arrays
114 |         V_x = [cov(marginals[:x_*t]) for t = 1:T] # T of DxD arrays
115 |         return m_x, V_x
116 |     end
117 |     return infer
118 | end
119 | 
120 |  # Source:
121 | # https://github.com/QuantEcon/QuantEcon.lectures.code/blob/master/kalman/gaussian_contours.jl
122 | function bivariate_normal(X::Matrix, Y::Matrix, σ_x::Real=1.0, σ_y::Real=1.0,
123 |                           μ_x::Real=0.0, μ_y::Real=0.0, σ_xy::Real=0.0)
124 |     Xμ = X .- μ_x
125 |     Yμ = Y .- μ_y
126 |     ρ = σ_xy/(σ_x*σ_y)
127 |     z = Xμ.^2/σ_x^2 + Yμ.^2/σ_y^2 - 2*ρ.*Xμ.*Yμ/(σ_x*σ_y)
128 |     denom = 2π*σ_x*σ_y*sqrt(1-ρ^2)
129 |     return exp.(-z/(2*(1-ρ^2))) ./ denom
130 | end
131 | 
132 | 
133 | # https://github.com/probml/pmtk3/blob/master/matlabTools/graphics/gaussPlot2d.m
134 | function plot_gauss2d(m, C)
135 |     U = eigvecs(C)
136 |     D = eigvals(C)
137 |     N = 100
138 |     t = range(0, stop=2*pi, length=N)
139 |     xy = zeros(Float64, 2, N)
140 |     xy[1,:] = cos.(t)
141 |     xy[2,:] = sin.(t)
142 |     k = sqrt(6) # approx sqrt(chi2inv(0.95, 2)) = 2.45
143 |     k = 0.5 # random hack to make ellipses smaller
144 |     w = (k * U * Diagonal(sqrt.(D))) * xy # 2*N
145 |     Plots.scatter!([m[1]], [m[2]], marker=:star)
146 |     handle = plot!(w[1,:] .+ m[1], w[2,:] .+ m[2])
147 |     return handle
148 | end
149 | 
150 | function plot_gauss2d_test()
151 |     m = [0.0, 0.0]
152 |     #C = randn(2,2); C = C*C';
153 |     C = [1.0 0.0; 0.0 3.0];
154 |     @test isposdef(C)
155 |     Plots.scatter([m[1]], [m[2]], marker=:star)
156 |     plot_2dgauss(m, C)
157 | end
158 | 
159 | 
160 | function compute_gauss_marginals(m_x, V_x, keep)
161 |     mm_x = [m_x[t][keep] for t=1:T]
162 |     VV_x = [V_x[t][keep, keep] for t=1:T]
163 |     return mm_x, VV_x
164 | end
165 | 
166 | 
167 | Random.seed!(1)
168 | T = 5
169 | model = make_params() # cannot use "params" because of "Flux.params"
170 | y_data = make_data(T, model) # T vector of D vectors
171 | ys = hcat(y_data...) # D*T matrix
172 | fg = make_factor_graph(T, model)
173 | infer = make_fg_inference(fg)
174 | mz, Vz = infer(y_data)
175 | mz2, Vz2 = compute_gauss_marginals(mz, Vz, 1:2)
176 | 
177 | using Plots; pyplot()
178 | closeall()
179 | plt = scatter(ys[1,:], ys[2,:], label="obs", reuse=false)
180 | xlims!(minimum(ys[1,:])-1, maximum(ys[1,:])+1)
181 | ylims!(minimum(ys[2,:])-1, maximum(ys[2,:])+1)
182 | display(plt)
183 | for t=1:T
184 |     plt = plot_gauss2d(mz2[t], Vz2[t])
185 |     #plt = annotate!(mz2[t][1], mz2[t][2], "t=$t");
186 |     display(plt)
187 | end
188 | 


--------------------------------------------------------------------------------
/Old/rosenbrock-opt-demo.jl:
--------------------------------------------------------------------------------
  1 | # Apply various optimization algorithsm to 2d objectives.
  2 | 
  3 | # For the rosenbrock function.
  4 | # See this webpage for a cool interactive demo of various methods
  5 | #https://bl.ocks.org/EmilienDupont/f97a3902f4f3a98f350500a3a00371db
  6 | 
  7 | # For the quadratic, we follow the parameters from
  8 | #http://people.duke.edu/~ccc14/sta-663-2018/notebooks/S09G_Gradient_Descent_Optimization.html
  9 | 
 10 | 
 11 | #import Flux
 12 | import Optim
 13 | #using Test
 14 | using LinearAlgebra
 15 | 
 16 | include("utils.jl")
 17 | include("first-order.jl")
 18 | include("second-order.jl")
 19 | 
 20 | 
 21 | #####################
 22 | 
 23 | f(x) = rosenbrock(x, a=1, b=5)
 24 | ∇f(x) = rosenbrock_grad(x, a=1, b=5)
 25 | ftuple = (x,y) -> f([x,y])
 26 | xdomain = range(-2, stop=2, length=100)
 27 | ydomain = range(-2, stop=2, length=100)
 28 | x0 = [-1.0, -1.0]
 29 | levels=[1,2,3,5,10,20,50,100]
 30 | fn_name = "rosen-a1b5"
 31 | 
 32 | #=
 33 | # To generate figs 5-5 to 5-7 of A4O we use this pseudo-rosenbrock function
 34 | # (note the 4x[2])
 35 | f = x -> (1-x[1])^2 + 100*(4x[2] - x[1]^2)^2
 36 | ∇f = x -> [2*(200x[1]^3 - 800x[1]*x[2] + x[1] - 1), -800*(x[1]^2 - 4x[2])]
 37 | #f = rosenbrock
 38 | #∇f = rosenbrock_grad
 39 | xdomain = range(-3, stop=2, length=100)
 40 | ydomain = range(-0.5, stop=2, length=100)
 41 | x0 = [-2,1.5]
 42 | levels=[2,10,50,200,500]
 43 | fn_name = "rosen2-a1b100"
 44 | =#
 45 | 
 46 | #=
 47 | f(x) = rosenbrock(x, a=1, b=100)
 48 | ∇f(x) = rosenbrock_grad(x, a=1, b=100)
 49 | xdomain = range(-4, stop=4, length=200)
 50 | ydomain = range(-4, stop=4, length=200)
 51 | x0 = [4.0, -4.0]
 52 | levels=range(1,stop=10,length=10) .^ 5
 53 | fn_name = "rosen-a1b100"
 54 | =#
 55 | 
 56 | #=
 57 | # Ill-conditioned quadratic
 58 | f(x) = x[1]^2 + 100*x[2]^2
 59 | ∇f(x) = [2*x[1], 200*x[2]]
 60 | xdomain = range(-1, stop=1, length=100)
 61 | ydomain = xdomain
 62 | x0 = [-1.0, -1.0]
 63 | levels =  [0.1,1,2,4,9, 16, 25, 36, 49, 64, 81, 100]
 64 | fn_name = "quad"
 65 | =#
 66 | 
 67 | #########################
 68 | 
 69 | N = 50
 70 | expts = Tuple{DescentMethod, String}[]
 71 | 
 72 | #values chosen for rosenbrock(a=1,b=5)
 73 | #push!(expts, (GradientDescentLineSearch(), "GD-linesearch"))
 74 | #push!(expts, (GradientDescent(0.01), "GD-0.01"))
 75 | #push!(expts, (GradientDescent(0.05), "GD-0.05"))
 76 | push!(expts, (Momentum(0.005), "momentum"))
 77 | push!(expts, (NesterovMomentum(0.005), "nesterov"))
 78 | push!(expts, (Adagrad(0.2), "Adagrad"))
 79 | push!(expts, (RMSProp(0.2), "RMSProp"))
 80 | push!(expts, (Adadelta(), "AdaDelta"))
 81 | push!(expts, (Adam(0.2), "Adam"))
 82 | push!(expts, (HyperGradientDescent(0.05, 2e-5), "hyper-GD"))
 83 | push!(expts, (BFGS(), "BFGS"))
 84 | push!(expts, (LBFGS(5), "LBFGS"))
 85 | push!(expts, (DFP(), "DFP"))
 86 | 
 87 | 
 88 | #=
 89 | # values chosen for quadratic
 90 | push!(expts, (GradientDescent(1e-3), "gradient descent (1e-3)"))
 91 | push!(expts, (GradientDescent(1e-2), "gradient descent (1e-2)"))
 92 | push!(expts, (GradientDescentLineSearch(), "gradient descent + linesearch"))
 93 | push!(expts, (Momentum(1e-2), "momentum"))
 94 | #push!(expts, (NesterovMomentum(0.0002, 0.92, zeros(2)), "Nesterov momentum"))
 95 | #push!(expts, (HyperGradientDescent(0.0004, 8e-13, NaN, zeros(2)), "hypermomentum"))
 96 | #push!(expts, (HyperNesterovMomentum(0.00023, 1e-12, 0.93, zeros(2), NaN, zeros(2)), "hyper-Nesterov"))
 97 | =#
 98 | 
 99 | 
100 | #=
101 | # values chosen for rosenbrock(a=1, b=100)
102 | push!(expts, (GradientDescent(1e-4), "gradient descent (0.0001)"))
103 | push!(expts, (GradientDescent(3e-4), "gradient descent (0.0003)"))
104 | push!(expts, (GradientDescentLineSearch(), "gradient descent + linesearch"))
105 | push!(expts, (Momentum(1e-5), "momentum"))
106 | #push!(expts, (NesterovMomentum(0.0002, 0.92, zeros(2)), "Nesterov momentum"))
107 | #push!(expts, (HyperGradientDescent(0.0004, 8e-13, NaN, zeros(2)), "hypermomentum"))
108 | #push!(expts, (HyperNesterovMomentum(0.00023, 1e-12, 0.93, zeros(2), NaN, zeros(2)), "hyper-Nesterov"))
109 | =#
110 | 
111 | #=
112 | # values chosen for pseudo-rosenbrock
113 | push!(expts, (GradientDescent(0.0003), "gradient descent (0.0003)"))
114 | push!(expts, (GradientDescent(0.003), "gradient descent (0.003)"))
115 | push!(expts, (GradientDescentLineSearch(), "gradient descent + linesearch"))
116 | #push!(expts, (Momentum(0.0003, 0.9, zeros(2)), "momentum"))
117 | #push!(expts, (NesterovMomentum(0.0002, 0.92, zeros(2)), "Nesterov momentum"))
118 | #push!(expts, (HyperGradientDescent(0.0004, 8e-13, NaN, zeros(2)), "hypermomentum"))
119 | #push!(expts, (HyperNesterovMomentum(0.00023, 1e-12, 0.93, zeros(2), NaN, zeros(2)), "hyper-Nesterov"))
120 | =#
121 | 
122 | using Plots
123 | pyplot()
124 | for (M, method_name) in expts
125 | 	println("using $method_name")
126 | 	plt = contour(xdomain, ydomain, (x,y)->f([x,y]), levels=levels, reuse=false)
127 | 	scatter!([x0[1]], [x0[2]], marker=:circle, markercolor=:black,
128 | 		markersize=5, label="start")
129 | 	scatter!([1.0], [1.0], marker=:star, markercolor=:red,
130 | 		markersize=5, label="opt")
131 | 
132 | 	pts = [x0]
133 | 	ftrace = [f(x0)]
134 | 	function cb(M, fn, grad, x, iter)
135 | 		y = fn(x)
136 | 		g = grad(x)
137 | 		gnorm = norm(g)
138 | 		#println("$name, iter=$iter, f=$y, gnorm=$gnorm")
139 | 		if abs(y)>1e10 || abs(gnorm)>1e10 || abs(gnorm) < 1e-2; return true; end
140 | 		push!(pts, x)
141 | 		push!(ftrace, y)
142 | 		return false
143 | 	end
144 | 	xfinal = run_descent_method(M, f, ∇f, x0; max_iter=N, callback=cb)
145 | 	xtrace = hcat(pts...) # 2xN
146 | 	#println(ftrace)
147 | 	plot!(xtrace[1,:], xtrace[2,:], label=method_name, linewidth=3, linecolor=:red)
148 | 	display(plt)
149 | 	savefig(plt, "Figures/$fn_name-$method_name-x.pdf")
150 | 	plt = plot(ftrace, label=method_name; yaxis=:log)
151 | 	display(plt)
152 | 	savefig(plt, "Figures/$fn_name-$method_name-f.pdf")
153 | end
154 | 
155 | 
156 | #Optiml.jl has a different interface
157 | # See https://julianlsolvers.github.io/Optim.jl/stable/#user/minimization/
158 | function apply_LBFGS(f, ∇f, x0, N)
159 | 	solver = Optim.LBFGS(;linesearch = Optim.BackTracking())
160 | 	opts = Optim.Options(iterations = N, store_trace=true, extended_trace=true)
161 | 	result = Optim.optimize(rosenbrock, rosenbrock_grad, x0, solver, opts; inplace=false)
162 | 	xopt = result.minimizer
163 | 	xtrace = Optim.x_trace(result) # N-vector of D-vectors
164 | 	N = length(xtrace)
165 | 	D = length(xtrace[1])
166 | 	xs = hcat(xtrace...) #DxN
167 | 	ftrace = Optim.f_trace(result)
168 | 	return xopt, xs, ftrace
169 | end
170 | 
171 |  xopt, xtrace, ftrace = apply_LBFGS(f, ∇f, x0, N)
172 |  method_name = "LBFGS-Optim"
173 |  plt = contour(xdomain, ydomain, (x,y)->f([x,y]), levels=levels, reuse=false)
174 |  scatter!([x0[1]], [x0[2]], marker=:circle, markercolor=:black,
175 | 	 markersize=5, label="start")
176 |  scatter!([1.0], [1.0], marker=:star, markercolor=:red,
177 | 	 markersize=5, label="opt")
178 |  plot!(xtrace[1,:], xtrace[2,:], label=method_name, linewidth=3, linecolor=:red)
179 |  display(plt)
180 |  savefig(plt, "Figures/$fn_name-$method_name-x.pdf")
181 |  plt = plot(ftrace, label=method_name; yaxis=:log)
182 |  display(plt)
183 |  savefig(plt, "Figures/$fn_name-$method_name-f.pdf")
184 | 


--------------------------------------------------------------------------------
/Old/dummy.jl:
--------------------------------------------------------------------------------
  1 | # scratch file
  2 | 
  3 | # Latent dimensionality, # hidden units.
  4 | Dz, Dh = 5, 500
  5 | 
  6 | # Components of recognition model / "encoder" MLP.
  7 | A, μ, logσ = Dense(28^2, Dh, tanh), Dense(Dh, Dz), Dense(Dh, Dz)
  8 | g(X) = (h = A(X); (μ(h), logσ(h)))
  9 | z(μ, logσ) = μ + exp(logσ) * randn(Float32)
 10 | 
 11 | # Generative model / "decoder" MLP.
 12 | f = Chain(Dense(Dz, Dh, tanh), Dense(Dh, 28^2, σ))
 13 | 
 14 | 
 15 | 
 16 | using Distributed
 17 | # parallel loops:
 18 | 
 19 | Sys.CPU_THREADS
 20 | 
 21 | # parallel maps:
 22 | using LinearAlgebra
 23 | function rank_marray()
 24 | 	marr = [rand(1000,1000) for i=1:10]
 25 | 	println(map(rank, marr))
 26 | end
 27 | @benchmark rank_marray()
 28 | 
 29 | # memory estimate:  158.40 MiB
 30 | #   allocs estimate:  202
 31 | #   --------------
 32 | #   minimum time:     5.659 s (3.19% GC)
 33 | #   median time:      5.659 s (3.19% GC)
 34 | #   mean time:        5.659 s (3.19% GC)
 35 | #   maximum time:     5.659 s (3.19% GC)
 36 | #   --------------
 37 | #   samples:          1
 38 | #   evals/sample:     1
 39 | #
 40 | function prank_marray()
 41 | 	marr = [rand(1000,1000) for i=1:10]
 42 | 	println(pmap(rank, marr))
 43 | end
 44 | @benchmark prank_marray()
 45 | # BenchmarkTools.Trial:
 46 | #   memory estimate:  76.34 MiB
 47 | #   allocs estimate:  1128
 48 | #   --------------
 49 | #   minimum time:     3.517 s (0.82% GC)
 50 | #   median time:      3.643 s (2.44% GC)
 51 | #   mean time:        3.643 s (2.44% GC)
 52 | #   maximum time:     3.769 s (3.95% GC)
 53 | #   --------------
 54 | #   samples:          2
 55 | #   evals/sample:     1
 56 | #
 57 | 
 58 | using BenchmarkTools
 59 | 
 60 | fmap(x) = map(x -> 2x, x)
 61 | fcomprehension(x) = [2x for x in x]
 62 | fdot(x) = 2 .* x
 63 | function floop(x)
 64 |     y = similar(x)
 65 |     for i in eachindex(x)
 66 |         y[i] = 2*x[i]
 67 |     end
 68 |     return y
 69 | end
 70 | function floopopt(x)
 71 |     y = similar(x)
 72 |     @simd for i in eachindex(x)
 73 |         @inbounds y[i] = 2*x[i]
 74 |     end
 75 |     return y
 76 | end
 77 | 
 78 | x = rand(1000)
 79 | @btime fmap($x)
 80 | @btime fcomprehension($x)
 81 | @btime fdot($x)
 82 | @btime floop($x)
 83 | @btime floopopt($x);
 84 | 
 85 | 
 86 | function buffon(n)
 87 | 	hit = 0
 88 | 	for i = 1:n
 89 | 		mp = rand()
 90 | 		phi = (rand() * pi) - pi / 2 # angle at which needle falls
 91 | 		xright = mp + cos(phi)/2  # x-location of needle
 92 | 		xleft = mp - cos(phi)/2
 93 | 		# if xright >= 1 || xleft <= 0
 94 | 		# 	hit += 1
 95 | 		# end
 96 | 		# Does needle cross either x == 0 or x == 1?
 97 | 		p = (xright >= 1 || xleft <= 0) ? 1 : 0
 98 | 		hit += p
 99 | 	end
100 | 	miss = n - hit
101 | 	piapprox = n / hit * 2
102 | end
103 | 
104 | @time buffon(1e8)
105 | #  3.399998 seconds (5 allocations: 176 bytes)
106 | #3.1413701707991235
107 | 
108 | function buffon_par(n)
109 | 	hit = @distributed (+) for i = 1:n
110 | 			mp = rand()
111 | 			phi = (rand() * pi) - pi / 2
112 | 			xright = mp + cos(phi)/2
113 | 			xleft = mp - cos(phi)/2
114 | 			(xright >= 1 || xleft <= 0) ? 1 : 0
115 | 		end
116 | 	miss = n - hit
117 | 	piapprox = n / hit * 2
118 | end
119 | 
120 | 
121 | @time buffon_par(1e8)
122 | #1.536845 seconds (2.61 k allocations: 137.891 KiB)
123 | #3.141489580637215
124 | 
125 | 
126 | #=
127 | # Starter problems from
128 | # http://ucidatascienceinitiative.github.io/IntroToJulia/Html/BasicProblems
129 | 
130 | using Statistics
131 | function binrv(n, p)
132 |     nheads = 0
133 |     for i=1:n
134 |         u = rand()
135 |         if u < p; nheads += 1; end
136 |     end
137 |     return nheads
138 | end
139 | ntrials = 1000
140 | counts = zeros(ntrials)
141 | n = 10; p = 0.5
142 | for i=1:ntrials
143 |     counts[i] = binrv(n, p)
144 | end
145 | c = counts ./ n
146 | println("mean of $ntrials trials with pheads=$p is $(mean(c))\n")
147 | @assert isapprox(mean(c), p, atol=0.1)
148 | 
149 | 
150 | N = 5
151 | using SparseArrays
152 | A = spzeros(N,N)
153 | for i=1:N
154 |     j=i; A[i,j]=-2
155 |     if i>1; j=i-1; A[i,j]=1; end
156 |     if i<N-1; j=i+1; A[i,j]=1; end
157 | end
158 | collect(A)
159 | 
160 | function fac(x)
161 |     y = one(x)
162 |     for i=1:x
163 |         y = y * i
164 |     end
165 |     return y
166 | end
167 | @assert fac(4)==24
168 | typeof(fac(100))
169 | 
170 | function fac2(x)
171 |     if x==1
172 |         return x
173 |     else
174 |         return x*fac2(x-1)
175 |     end
176 | end
177 | @assert fac(4) == fac2(4)
178 | typeof(fac(100))
179 | =#
180 | 
181 | #=
182 | # https://stackoverflow.com/questions/20126061/creating-a-confidence-ellipses-in-a-sccatterplot-using-matplotlib
183 | lambda = LinearAlgebra.eigvals(C)
184 | V = LinearAlgebra.eigvecs(C)
185 | lambda = sqrt.(lambda)
186 | sf = 2 # 2 std dev
187 | angle=rad2deg(cos(V[1, 1]))
188 | ax = plt.subplot(111, aspect="equal")
189 | ell = patch.Ellipse(xy=(m[1], m[2]),
190 |                   width=lambda[1]*sf*2, height=lambda[2]*sf*2, angle=angle)
191 | ell.set_facecolor("none")
192 | ax[:add_artist](ell)
193 | ax.add_artist(ell)
194 | gcf()
195 | 
196 | 
197 | #https://stackoverflow.com/questions/30351546/using-matplotlibs-patches-in-julia
198 | using PyPlot
199 | using PyCall
200 | @pyimport matplotlib.patches as patch
201 | 
202 | function plot_2dgauss(m, C)
203 |     cfig = figure()
204 |     ax = cfig[:add_subplot](1,1,1)
205 |     ax[:set_aspect]("equal")
206 | 
207 |     lambda = LinearAlgebra.eigvals(C)
208 |     V = LinearAlgebra.eigvecs(C)
209 |     lambda = sqrt.(lambda)
210 |     sf = 2 # number of  std deviations away
211 |     w = lambda[1] * sf
212 |     h = lambda[2] * sf
213 |     #theta = rad2deg(cos(V[1, 1]))
214 |     theta = np.degrees(np.arctan2(*vecs[:,0][::-1]))
215 |     ax = plt.subplot(111, aspect="equal")
216 | 
217 |     c = patch.Ellipse(xy=(m[1], m[2]), fc="none",
218 |                       width=w, height=h, angle=theta)
219 |     #c = patch.Circle([0.5,0.5],0.4,fc="blue",ec="red",linewidth=.5,zorder=0)
220 |     ax[:add_artist](c)
221 |     cfig[:savefig]("circle.png")
222 |     gcf()
223 | end
224 | =#
225 | 
226 | #=
227 | function plot_gauss2d(m, C, txt=nothing)
228 |     m_x, m_y = m[1], m[2]
229 |     s_x, s_y = sqrt(C[1, 1]), sqrt(C[2, 2])
230 |     s_xy = C[1, 2]
231 |     xrange = range(m_x - 2*s_x, stop=m_x + 2*s_x, length=100)
232 |     yrange = range(m_y - 2*s_y, stop=m_y + 2*s_y, length=100)
233 |     dist = Distributions.MvNormal(m, C)
234 |     p = Distributions.pdf(dist, m)
235 |     s = mean([s_x, s_y])
236 |     plt = contour!(xrange, yrange, (x,y)->Distributions.pdf(dist,[x,y]), levels = [s*p])
237 |     if ~isnothing(txt); annotate!(m_x, m_y, txt); end
238 |     return plt
239 | end
240 | =#
241 | 
242 | 
243 | 
244 | #=
245 | # StatsModels.jl defines the Formula type and @formula macro
246 | # https://github.com/JuliaStats/StatsModels.jl/blob/4701a1bd221f6281371f254f69c2f95c19e02a92/src/formula.jl
247 | function make_formula_all_features(df, target)
248 | 	col_symbols = Set(names(df))
249 | 	feature_symbols = setdiff(col_symbols, Set([target]))
250 | 	feature_strings = [string(s) for s in feature_symbols]
251 | 	all_features = join(feature_strings, " + " )
252 | 	formula = string(target) * " ~ " * all_features
253 | 	return formula
254 | end
255 | 
256 | function make_formula_test()
257 | 	n = 10
258 | 	df = DataFrame(X1=randn(n), X2=randn(n), Y=randn(n))
259 | 	ols = lm(@formula(Y ~ X1 + X2), df)
260 | 	formula = make_formula_all_features(df, :Y)
261 | 	f_expr = Meta.parse(formula)
262 | 	ols2 = lm(@formula(f_expr), df)
263 | end
264 | =#
265 | 


--------------------------------------------------------------------------------
/Old/kalman_qe.jl:
--------------------------------------------------------------------------------
  1 | # Slightly modified from
  2 | # https://github.com/QuantEcon/QuantEcon.jl/blob/master/src/kalman.jl
  3 | # To map the notation used in MLAPP ch. 18, note that
  4 | # - the book uses C, the code uses G for observation model
  5 | # - the book uses z, the code uses x for hidden state
  6 | # - the book uses mu_{t|t-1}, the code uses cur_x_hat
  7 | # - the book uses Sigma_t|t-1}, the code uses cur_sigma
  8 | # ie. the state of the KF object is the posterior predictive distribution
  9 | 
 10 | #=
 11 | Implements the Kalman filter for a linear Gaussian state space model.
 12 | @author : Spencer Lyon <spencer.lyon@nyu.edu>
 13 | @date: 2014-07-29
 14 | References
 15 | ----------
 16 | https://lectures.quantecon.org/jl/kalman.html
 17 | TODO: Do docstrings here after implementing LinerStateSpace
 18 | =#
 19 | 
 20 | mutable struct Kalman
 21 |     A # transition model
 22 |     G # observation model
 23 |     Q # transition noise
 24 |     R # observation noise
 25 |     k # num. hidden states
 26 |     n # num. observed states
 27 |     cur_x_hat # posterior predictive mean
 28 |     cur_sigma # posterior predictive covariance
 29 | end
 30 | 
 31 | 
 32 | # Initializes current mean and cov to zeros
 33 | function Kalman(A, G, Q, R)
 34 |     k = size(G, 1)
 35 |     n = size(G, 2)
 36 |     xhat = n == 1 ? zero(eltype(A)) : zeros(n)
 37 |     Sigma = n == 1 ? zero(eltype(A)) : zeros(n, n)
 38 |     return Kalman(A, G, Q, R, k, n, xhat, Sigma)
 39 | end
 40 | 
 41 | 
 42 | function set_state!(k::Kalman, x_hat, Sigma)
 43 |     k.cur_x_hat = x_hat
 44 |     k.cur_sigma = Sigma
 45 |     Nothing
 46 | end
 47 | 
 48 | #=
 49 | """
 50 | Updates the moments (`cur_x_hat`, `cur_sigma`) of the time ``t`` prior to the
 51 | time ``t`` filtering distribution, using current measurement ``y_t``.
 52 | The updates are according to
 53 | ```math
 54 |     \hat{x}^F = \hat{x} + \Sigma G' (G \Sigma G' + R)^{-1}
 55 |                     (y - G \hat{x}) \\
 56 |     \Sigma^F = \Sigma - \Sigma G' (G \Sigma G' + R)^{-1} G
 57 |                \Sigma
 58 | ```
 59 | #### Arguments
 60 | - `k::Kalman` An instance of the Kalman filter
 61 | - `y` The current measurement
 62 | """
 63 | =#
 64 | function prior_to_filtered!(k::Kalman, y)
 65 |     # simplify notation
 66 |     G, R = k.G, k.R
 67 |     x_hat, Sigma = k.cur_x_hat, k.cur_sigma
 68 | 
 69 |     # and then update
 70 |     if k.k > 1
 71 |         reshape(y, k.k, 1)
 72 |     end
 73 |     A = Sigma * G'
 74 |     B = G * Sigma * G' + R
 75 |     M = A / B
 76 |     k.cur_x_hat = x_hat + M * (y .- G * x_hat)
 77 |     k.cur_sigma = Sigma - M * G * Sigma
 78 |     Nothing
 79 | end
 80 | 
 81 | """
 82 | Updates the moments of the time ``t`` filtering distribution to the
 83 | moments of the predictive distribution, which becomes the time
 84 | ``t+1`` prior
 85 | #### Arguments
 86 | - `k::Kalman` An instance of the Kalman filter
 87 | """
 88 | function filtered_to_forecast!(k::Kalman)
 89 |     # simplify notation
 90 |     A, Q = k.A, k.Q
 91 |     x_hat, Sigma = k.cur_x_hat, k.cur_sigma
 92 | 
 93 |     # and then update
 94 |     k.cur_x_hat = A * x_hat
 95 |     k.cur_sigma = A * Sigma * A' + Q
 96 |     Nothing
 97 | end
 98 | 
 99 | """
100 | Updates `cur_x_hat` and `cur_sigma` given array `y` of length `k`.  The full
101 | update, from one period to the next
102 | #### Arguments
103 | - `k::Kalman` An instance of the Kalman filter
104 | - `y` An array representing the current measurement
105 | """
106 | function update!(k::Kalman, y)
107 |     prior_to_filtered!(k, y)
108 |     filtered_to_forecast!(k)
109 |     Nothing
110 | end
111 | 
112 | 
113 | function stationary_values(k::Kalman)
114 |     # simplify notation
115 | 
116 |     A, Q, G, R = k.A, k.Q, k.G, k.R
117 | 
118 |     # solve Riccati equation, obtain Kalman gain
119 |     Sigma_inf = solve_discrete_riccati(A', G', Q, R)
120 |     K_inf = A * Sigma_inf * G' * inv(G * Sigma_inf * G' .+ R)
121 |     return Sigma_inf, K_inf
122 | end
123 | 
124 | """
125 | computes log-likelihood of period ``t``
126 | ##### Arguments
127 | - `kn::Kalman`: `Kalman` specifying the model. Current values must be the
128 |                 forecast for period ``t`` observation conditional on ``t-1``
129 |                 observation.
130 | - `y::AbstractVector`: Respondentbservations at period ``t``
131 | ##### Returns
132 | - `logL::Real`: log-likelihood of observations at period ``t``
133 | """
134 | function log_likelihood(k::Kalman, y::AbstractVector)
135 |     eta = y - k.G*k.cur_x_hat # forecast error
136 |     P = k.G*k.cur_sigma*k.G' + k.R # covariance matrix of forecast error
137 |     logL = - (length(y)*log(2pi) + logdet(P) .+ eta'/P*eta)[1]/2
138 |     return logL
139 | end
140 | 
141 | """
142 | computes log-likelihood of entire observations
143 | ##### Arguments
144 | - `kn::Kalman`: `Kalman` specifying the model. Initial value must be the prior
145 |                 for t=1 period observation, i.e. ``x_{1|0}``.
146 | - `y::AbstractMatrix`: `n x T` matrix of observed data.
147 |                        `n` is the number of observed variables in one period.
148 |                        Each column is a vector of observations at each period.
149 | ##### Returns
150 | - `logL::Real`: log-likelihood of all observations
151 | """
152 | function compute_loglikelihood(kn::Kalman, y::AbstractMatrix)
153 |     T = size(y, 2)
154 |     logL = 0
155 |     # forecast and update
156 |     for t in 1:T
157 |         logL = logL + log_likelihood(kn, y[:, t])
158 |         update!(kn, y[:, t])
159 |     end
160 |     return logL
161 | end
162 | 
163 | """
164 | ##### Arguments
165 | - `kn::Kalman`: `Kalman` specifying the model. Initial value must be the prior
166 |                 for t=1 period observation, i.e. ``x_{1|0}``.
167 | - `y::AbstractMatrix`: `n x T` matrix of observed data.
168 |                        `n` is the number of observed variables in one period.
169 |                        Each column is a vector of observations at each period.
170 | ##### Returns
171 | - `x_smoothed::AbstractMatrix`: `k x T` matrix of smoothed mean of states.
172 |                                 `k` is the number of states.
173 | - `logL::Real`: log-likelihood of all observations
174 | - `sigma_smoothed::AbstractArray` `k x k x T` array of smoothed covariance matrix of states.
175 | """
176 | function smooth(kn::Kalman, y::AbstractMatrix)
177 |     G, R = kn.G, kn.R
178 | 
179 |     T = size(y, 2)
180 |     n = kn.n
181 |     x_filtered = Matrix{Float64}(undef, n, T)
182 |     sigma_filtered = Array{Float64}(undef, n, n, T)
183 |     sigma_forecast = Array{Float64}(undef, n, n, T)
184 |     logL = 0
185 |     # forecast and update
186 |     for t in 1:T
187 |         logL = logL + log_likelihood(kn, y[:, t])
188 |         prior_to_filtered!(kn, y[:, t])
189 |         x_filtered[:, t], sigma_filtered[:, :, t] = kn.cur_x_hat, kn.cur_sigma
190 |         filtered_to_forecast!(kn)
191 |         sigma_forecast[:, :, t] = kn.cur_sigma
192 |     end
193 |     # smoothing
194 |     x_smoothed = copy(x_filtered)
195 |     sigma_smoothed = copy(sigma_filtered)
196 |     for t in (T-1):-1:1
197 |         x_smoothed[:, t], sigma_smoothed[:, :, t] =
198 |             go_backward(kn, x_filtered[:, t], sigma_filtered[:, :, t],
199 |                         sigma_forecast[:, :, t], x_smoothed[:, t+1],
200 |                         sigma_smoothed[:, :, t+1])
201 |     end
202 | 
203 |     return x_smoothed, logL, sigma_smoothed
204 | end
205 | 
206 | """
207 | ##### Arguments
208 | - `kn::Kalman`: `Kalman` specifying the model.
209 | - `x_fi::Vector`: filtered mean of state for period ``t``
210 | - `sigma_fi::Matrix`: filtered covariance matrix of state for period ``t``
211 | - `sigma_fo::Matrix`: forecast of covariance matrix of state for period ``t+1``
212 |                       conditional on period ``t`` observations
213 | - `x_s1::Vector`: smoothed mean of state for period ``t+1``
214 | - `sigma_s1::Matrix`: smoothed covariance of state for period ``t+1``
215 | ##### Returns
216 | - `x_s1::Vector`: smoothed mean of state for period ``t``
217 | - `sigma_s1::Matrix`: smoothed covariance of state for period ``t``
218 | """
219 | function go_backward(k::Kalman, x_fi::Vector,
220 |                      sigma_fi::Matrix, sigma_fo::Matrix,
221 |                      x_s1::Vector, sigma_s1::Matrix)
222 |     A = k.A
223 |     temp = sigma_fi*A'/sigma_fo
224 |     x_s = x_fi + temp*(x_s1-A*x_fi)
225 |     sigma_s = sigma_fi + temp*(sigma_s1-sigma_fo)*temp'
226 |     return x_s, sigma_s
227 | end
228 | 


--------------------------------------------------------------------------------
/Old/first-order.jl:
--------------------------------------------------------------------------------
  1 | # Modified from
  2 | #https://github.com/sisl/algforopt-notebooks/blob/master/first-order.ipynb
  3 | 
  4 | # See also
  5 | # https://github.com/FluxML/Flux.jl/blob/master/src/optimise/optimisers.jl
  6 | 
  7 | #include("support_code.jl");
  8 | #using Plots
  9 | #using Vec
 10 | include("utils.jl")
 11 | 
 12 | # A4O p36
 13 | function bracket_minimum(f, x=0; s=1e-2, k=2.0)
 14 |     a, ya = x, f(x)
 15 |     b, yb = a + s, f(a + s)
 16 |     if yb > ya
 17 |         a, b = b, a
 18 |         ya, yb = yb, ya
 19 |         s = -s
 20 |     end
 21 |     while true
 22 |         c, yc = b + s, f(b + s)
 23 |         if yc > yb
 24 |             return a < c ? (a, c) : (c, a)
 25 |         end
 26 |         a, ya, b, yb = b, yb, c, yc
 27 |         s *= k
 28 |     end
 29 | end
 30 | 
 31 | # A4O p92
 32 | # 1d optimization based on Newton's method
 33 | function secant_method(df, x1, x2, ϵ)
 34 |     df1 = df(x1)
 35 |     delta = Inf
 36 |     while abs(delta) > ϵ
 37 |         df2 = df(x2)
 38 |         delta = (x2 - x1)/(df2 - df1)*df2
 39 |         x1, x2, df1 = x2, x2 - delta, df2
 40 |     end
 41 |     x2
 42 | end
 43 | 
 44 | # A4O p54
 45 | function line_search_secant(f, dfn, x, d)
 46 |     objective = α -> f(x + α*d)
 47 |     a, b = bracket_minimum(objective)
 48 |     #α = minimize(objective, a, b)
 49 | 	f1d = a -> begin
 50 | 		xnew = x + a*d
 51 | 		g2 = dfn(xnew)
 52 | 		v1 = dot(g2, d) / norm(d)
 53 | 		return v1
 54 | 	end
 55 | 	α = secant_method(f1d, a, b, 0.0001)
 56 | 	#println("ls secant chose $α")
 57 |     return α
 58 | end
 59 | 
 60 | 
 61 | 
 62 | # A40 p56
 63 | using LinearAlgebra
 64 | function line_search_backtracking(f, ∇f, x, d; α=1.0, p=0.5, β=1e-4, max_iter=50)
 65 | 	y, g = f(x), ∇f(x)
 66 | 	iter = 1
 67 | 	while (iter <= max_iter) && (f(x + α*d) > y + β*α*(g⋅d))
 68 | 		α *= p
 69 | 		#println("line search iter $iter, $α")
 70 | 		iter += 1
 71 | 	end
 72 | 	#println("backtracking ls chose $α")
 73 | 	return α
 74 | end
 75 | 
 76 | import LineSearches
 77 | #https://julianlsolvers.github.io/LineSearches.jl/latest/examples/generated/customoptimizer.html
 78 | function line_search_backtracking_optim(f, g, x, d)
 79 | 	linesearch_fn = LineSearches.BackTracking(order=3)
 80 | 	#linesearch_fn = LineSearches.HagerZhang()
 81 | 	s = d
 82 | 	ϕ(α) = f(x .+ α.*s)
 83 |     function dϕ(α)
 84 |         gvec = g(x .+ α.*s)
 85 |         return dot(gvec, s)
 86 |     end
 87 |     function ϕdϕ(α)
 88 | 		phi =  f(x .+ α.*s)
 89 | 		gvec = g(x .+ α.*s)
 90 |         dphi = dot(gvec, s)
 91 |         return (phi, dphi)
 92 |     end
 93 | 	fx = f(x)
 94 | 	gvec = g(x)
 95 | 	dϕ_0 = dot(s, gvec)
 96 | 	α, fx = linesearch_fn(ϕ, dϕ, ϕdϕ, 1.0, fx, dϕ_0)
 97 | 	#println("optim ls chose $α")
 98 | 	return α
 99 | end
100 | 
101 | 
102 | #################
103 | 
104 | 
105 | abstract type DescentMethod end
106 | 
107 | struct GradientDescent <: DescentMethod
108 | 		α::Real # learning rate
109 | end
110 | function init!(M::GradientDescent, f, ∇f, x)
111 | 	return M
112 | end
113 | function step!(M::GradientDescent, f, ∇f, x)
114 | 	α, g = M.α, ∇f(x)
115 | 	return x - α*g
116 | end
117 | 
118 | 
119 | struct GradientDescentLineSearch <: DescentMethod
120 | end
121 | 
122 | function init!(M::GradientDescentLineSearch, f, ∇f, x)
123 | 	return M
124 | end
125 | function step!(M::GradientDescentLineSearch, f, ∇f, x)
126 | 	d =  -normalize(∇f(x))
127 | 	α = line_search_backtracking(f,  ∇f, x, d)
128 | 	return x + α*d
129 | end
130 | 
131 | struct GradientDescentLineSearchExact <: DescentMethod
132 | end
133 | 
134 | function init!(M::GradientDescentLineSearchExact, f, ∇f, x)
135 | 	return M
136 | end
137 | function step!(M::GradientDescentLineSearchExact, f, ∇f, x)
138 | 	d =  -normalize(∇f(x))
139 | 	α = line_search_secant(f,  ∇f, x, d)
140 | 	return x + α*d
141 | end
142 | 
143 | mutable struct Momentum <: DescentMethod
144 | 	α::Real # learning rate
145 | 	β::Real # momentum decay
146 | 	v::Vector{Real} # momentum
147 | end
148 | # default constructor
149 | Momentum(α) = Momentum(α, 0.9, Vector{Real}())
150 | 
151 | function init!(M::Momentum, f, ∇f, x)
152 | 	M.v = zeros(length(x))
153 | 	return M
154 | end
155 | function step!(M::Momentum, f, ∇f, x)
156 | 	α, β, v, g = M.α, M.β, M.v, ∇f(x)
157 | 	v[:] = β*v - α*g
158 | 	return x + v
159 | end
160 | 
161 | mutable struct NesterovMomentum <: DescentMethod
162 | 		α::Real # learning rate
163 | 		β::Real # momentum decay
164 | 		v::Vector{Real} # momentum
165 | end
166 | # default constructor
167 | NesterovMomentum(α) = NesterovMomentum(α, 0.9, Vector{Real}())
168 | 
169 | function init!(M::NesterovMomentum, f, ∇f, x)
170 | 	M.v = zeros(length(x))
171 | 	return M
172 | end
173 | function step!(M::NesterovMomentum, f, ∇f, x)
174 | 	α, β, v = M.α, M.β, M.v
175 | 	v[:] = β*v - α*∇f(x + β*v)
176 | 	return x + v
177 | end
178 | 
179 | 
180 | mutable struct Adagrad <: DescentMethod
181 | 	α::Real # learning rate
182 | 	ϵ::Real # small value
183 | 	s::Vector{Real} # sum of squared gradient
184 | end
185 |  # default constructor
186 | Adagrad(α) = Adagrad(α, 1e-8, Vector{Real}())
187 | 
188 | function init!(M::Adagrad, f, ∇f, x)
189 | 	M.s = zeros(length(x))
190 | 	return M
191 | end
192 | 
193 | function step!(M::Adagrad, f, ∇f, x)
194 | 	α, ϵ, s, g = M.α, M.ϵ, M.s, ∇f(x)
195 | 	s[:] += g.*g
196 | 	return x - α*g ./ (sqrt.(s) .+ ϵ)
197 | end
198 | 
199 | mutable struct RMSProp <: DescentMethod
200 | 	α::Real # learning rate
201 | 	γ::Real # decay
202 | 	ϵ::Real # small value
203 | 	s::Vector{Real} # sum of squared gradient
204 | end
205 | #default constructor
206 | RMSProp(α) = RMSProp(α, 0.9, 1e-8, Vector{Real}())
207 | 
208 | function init!(M::RMSProp, f, ∇f, x)
209 | 	M.s = zeros(length(x))
210 | 	return M
211 | end
212 | function step!(M::RMSProp, f, ∇f, x)
213 | 	α, γ, ϵ, s, g = M.α, M.γ, M.ϵ, M.s, ∇f(x)
214 | 	s[:] = γ*s + (1-γ)*(g.*g)
215 | 	return x - α*g ./ (sqrt.(s) .+ ϵ)
216 | end
217 | 
218 | mutable struct Adadelta <: DescentMethod
219 | 	γs::Real # gradient decay
220 | 	γx::Real # update decay
221 | 	ϵ::Real # small value
222 | 	s::Vector{Real} # sum of squared gradients
223 | 	u::Vector{Real} # sum of squared updates
224 | end
225 | # default constructor
226 | Adadelta() = Adadelta(0.9, 0.9, 1e-8, Vector{Real}(), Vector{Real}())
227 | 
228 | function init!(M::Adadelta, f, ∇f, x)
229 | 	M.s = zeros(length(x))
230 | 	M.u = zeros(length(x))
231 | 	return M
232 | end
233 | function step!(M::Adadelta, f, ∇f, x)
234 | 	γs, γx, ϵ, s, u, g = M.γs, M.γx, M.ϵ, M.s, M.u, ∇f(x)
235 | 	s[:] = γs*s + (1-γs)*g.*g
236 | 	Δx = - (sqrt.(u) .+ ϵ) ./ (sqrt.(s) .+ ϵ) .* g
237 | 	u[:] = γx*u + (1-γx)*Δx.*Δx
238 | 	return x + Δx
239 | end
240 | 
241 | mutable struct Adam <: DescentMethod
242 | 	α::Real # learning rate
243 | 	γv::Real # decay
244 | 	γs::Real # decay
245 | 	ϵ::Real # small value
246 | 	k::Int # step counter
247 | 	v::Vector{Real} # 1st moment estimate
248 | 	s::Vector{Real} # 2nd moment estimate
249 | end
250 | # default constructor
251 | Adam(α) = Adam(α, 0.9, 0.999, 1e-8, 0, Vector{Real}(), Vector{Real}())
252 | 
253 | function init!(M::Adam, f, ∇f, x)
254 | 	M.k = 0
255 | 	M.v = zeros(length(x))
256 | 	M.s = zeros(length(x))
257 | 	return M
258 | end
259 | function step!(M::Adam, f, ∇f, x)
260 | 	α, γv, γs, ϵ, k = M.α, M.γv, M.γs, M.ϵ, M.k
261 | 	s, v, g = M.s, M.v, ∇f(x)
262 | 	v[:] = γv*v + (1-γv)*g
263 | 	s[:] = γs*s + (1-γs)*g.*g
264 | 	M.k = k += 1
265 | 	v_hat = v ./ (1 - γv^k)
266 | 	s_hat = s ./ (1 - γs^k)
267 | 	return x - α*v_hat ./ (sqrt.(s_hat) .+ ϵ)
268 | end
269 | 
270 | mutable struct HyperGradientDescent <: DescentMethod
271 | 	α0::Real # initial learning rate
272 | 	μ::Real # learning rate of the learning rate
273 | 	α::Real # current learning rate
274 | 	g_prev::Vector{Real} # previous gradient
275 | end
276 | # default constructor
277 | HyperGradientDescent(α0, μ) = HyperGradientDescent(α0, μ, NaN, Vector{Real}())
278 | 
279 | function init!(M::HyperGradientDescent, f, ∇f, x)
280 | 	M.α = M.α0
281 | 	M.g_prev = zeros(length(x))
282 | 	return M
283 | end
284 | function step!(M::HyperGradientDescent, f, ∇f, x)
285 | 	α, μ, g, g_prev = M.α, M.μ, ∇f(x), M.g_prev
286 | 	α = α + μ*(g⋅g_prev)
287 | 	M.g_prev, M.α = g, α
288 | 	return x - α*g
289 | end
290 | 
291 | 
292 | #####
293 | 
294 | function run_descent_method(M::DescentMethod, f, ∇f,  x0;
295 | 		max_iter = 100,
296 | 		callback = (M, f, ∇f, x, iter) -> norm(∇f(x), Inf) < 1e-4)
297 |     x = x0
298 | 	done = false
299 |     init!(M, f, ∇f, x)
300 |     for iter = 1:max_iter
301 | 		x = step!(M, f, ∇f, x);
302 | 		done = callback(M, f, ∇f, x, iter)
303 |         if done; break; end
304 |     end
305 | 	return x
306 | end
307 | 
308 | ####################
309 | 


--------------------------------------------------------------------------------
/Old/code_breaker.jl:
--------------------------------------------------------------------------------
  1 | 
  2 | # See the explanation in this doc:
  3 | #https://docs.google.com/document/d/1nTpmZhs-VxpJivrQKIVnvmelc02J0F1ki5ChU0EyG68/edit?usp=sharing
  4 | 
  5 | include("utils.jl")
  6 | 
  7 | """
  8 |      make_code_prior(L, A, partial_code)
  9 | 
 10 |  Make uniform prior over codewords of length `L` over alphabet `A` given a
 11 |  `partial_code` vector, which is 0 if element is unknown, and otherwise
 12 |  contains true value of code at that location.
 13 | """
 14 | function make_code_prior(L, A, partial_code)
 15 |     shape = Tuple(fill(A, L))
 16 |     ncodes = A^L
 17 |     codes = [Base._ind2sub(shape, i) for i in 1:ncodes]
 18 |     prior = ones(ncodes)
 19 |     for i = 1:ncodes
 20 |         for l=1:L
 21 |             if partial_code[l]>0 && (codes[i][l] != partial_code[l])
 22 |                 prior[i] = 0
 23 |             end
 24 |         end
 25 |     end
 26 |     prior = normalize_probdist(prior);
 27 |     return prior
 28 | end
 29 | 
 30 | function make_code_prior_test()
 31 |     L = 2; A = 3;
 32 |     partial_code = [0, 2]
 33 |     code_prior = make_code_prior(L, A, partial_code)
 34 |     #= Possible codes are as follows, ones compatible with partial_code with *
 35 |     1 1
 36 |     2 1
 37 |     3 1
 38 |     1 2 *
 39 |     2 2 *
 40 |     3 2 *
 41 |     1 3
 42 |     2 3
 43 |     3 3
 44 |     =#
 45 |     @test isapprox(code_prior, [0, 0, 0, 0.3, 0.3, 0.3, 0, 0, 0]; atol=1e-1)
 46 | end
 47 | 
 48 | #make_code_prior_test()
 49 | 
 50 | function encode_discrete(v, domain)
 51 |     ndx = findfirst(x -> x==v, domain)
 52 |     @assert !isnothing(ndx) "value $v not in domain $domain"
 53 |     return ndx
 54 | end
 55 | 
 56 | function hamming_distance(x, c)
 57 |     return sum(x .!= c)
 58 | end
 59 | 
 60 | function hamming_distance_ints(xndx, cndx, L, A)
 61 |     shape = Tuple(fill(A, L))
 62 |     xbits = Base._ind2sub(shape, xndx)
 63 |     cbits = Base._ind2sub(shape, cndx)
 64 |     return hamming_distance(xbits, cbits)
 65 | end
 66 | 
 67 | 
 68 | """
 69 |     make_CPT_from_fn(f, domain1, domain2, frange)
 70 | 
 71 | Make a conditional proability table from a deterministic 2 argument function
 72 | `f` with inputs x1 in `domain1`, x2 in `domain2` and output y in `frange`.
 73 | 
 74 | We assume the domains and range are finite sets. Returns array of form
 75 | cpt[x1,x2,y], where the last dimension sums to 1 for each x1, x2.
 76 | """
 77 | function make_CPT_from_fn(f, domain1, domain2, frange)
 78 |     cpt = zeros(length(domain1), length(domain2), length(frange))
 79 |     for (x1v, x1i) in enumerate(domain1)
 80 |         for (x2v, x2i) in enumerate(domain2)
 81 |             yv = f(x1v, x2v)
 82 |             yi = encode_discrete(yv, frange)
 83 |             cpt[x1i, x2i, yi] = 1.0
 84 |         end
 85 |     end
 86 |     return cpt
 87 | end
 88 | 
 89 | 
 90 | function make_hamming_CPT(L, A)
 91 |     domain = 1:A^L # strings of length L on alphabet A
 92 |     frange = 0:L # possible values of hamming distance
 93 |     f(x1, x2) = hamming_distance_ints(x1, x2, L, A)
 94 |     CPT = make_CPT_from_fn(f, domain, domain, frange)
 95 |     return CPT
 96 | end
 97 | 
 98 | function make_hamming_CPT_test()
 99 |     L = 2; A = 3;
100 |     CPT = make_hamming_CPT(L, A)
101 |     @test CPT[:,1,:] == [
102 |     1 0 0; # hamming([1,1], [1,1])=1
103 |     0 1 0;
104 |     0 1 0;
105 |     0 1 0;
106 |     0 0 1; # hamming([2,2], [1,1])=2
107 |     0 0 1;
108 |     0 1 0;
109 |     0 0 1;
110 |     0 0 1
111 |     ]
112 | end
113 | 
114 | """
115 |     marginalize_CPT_parent(child_CPT, parent_prior)
116 | 
117 | Compute cpt(p1, c) = sum_p2 child_CPT(p1, p2, c) * parent_prior(p1).
118 | """
119 | function marginalize_CPT_parent(child_CPT, parent_prior)
120 |     @assert ndims(child_CPT) == 3
121 |     (p1sz, p2sz, csz) = size(child_CPT)
122 |     ppsz = length(parent_prior)
123 |     errmsg = "parent prior has size $ppsz, should be $p2sz"
124 |     @assert (length(parent_prior) == p2sz) errmsg
125 |     CPT = zeros(p1sz, csz)
126 |     for p1 in 1:p1sz
127 |         for c in 1:csz
128 |             CPT[p1, c] = sum(child_CPT[p1, :, c] .* parent_prior)
129 |         end
130 |     end
131 |     return CPT
132 | end
133 | 
134 | """
135 | We make a surrogate model for the function y=f(x),
136 | where x is a string in {1,..,A}^L. Our surrogate has the form
137 | fhat(x) = argmax_y CPT_y_x[x, y] where
138 | CPT_y_x[x,y] = sum_c CPT_c[c] * CPT_y_xc[x,c,y]
139 | where CPT_c is a prior over hidden code c.
140 | We assume f(x) in {0,1,...,L}.
141 | """
142 | mutable struct SurrogateModel
143 |     CPT_c::Array{Float64,1}
144 |     CPT_y_xc::Array{Float64, 3}
145 |     CPT_y_x::Array{Float64, 2}
146 |     L::Int # length of string
147 |     A::Int # alphabet size
148 |     fdom::Array{Int,1} # domain of unknown function
149 |     frange::Array{Int,1} # range of unknown function
150 | end
151 | 
152 | 
153 | function encode_x(model, xstr)
154 |     xdomain = Tuple(fill(model.A, model.L))
155 |     xndx = Base._sub2ind(xdomain, xstr...)
156 |     return xndx
157 | end
158 | 
159 | function decode_x(model, xndx)
160 |     xdomain = Tuple(fill(model.A, model.L))
161 |     xstr = Base._ind2sub(xdomain, xndx)
162 |     return xstr
163 | end
164 | 
165 | function encode_y(model, yval)
166 |     yndx = encode_discrete(yval, model.frange)
167 | end
168 | 
169 | 
170 | function make_surrogate(L, A, partial_code)
171 |     CPT_c = make_code_prior(L, A, partial_code)
172 |     CPT_y_xc = make_hamming_CPT(L, A)
173 |     CPT_y_x = marginalize_CPT_parent(CPT_y_xc, CPT_c)
174 |     model = SurrogateModel(CPT_c, CPT_y_xc, CPT_y_x, L, A, 1:A^L, 0:L)
175 |     return model
176 | end
177 | 
178 | function make_surrogate_test()
179 |     L = 3; A = 4; partial_code = [1,1,1]
180 |     model = make_surrogate(L, A, partial_code)
181 |     # Prob the model with query strings x which are increasignly
182 |     # far (in Hamming Distance) from the true code of (1,1,1)
183 |     xs = [ [1,1,1], [2,1,1], [2,2,1], [2,2,2] ]
184 |     ps = [ [1.0,0,0,0], [0,1.0,0,0], [0,0,1.0,0], [0,0,0,1.0]] # 0-3 bits away
185 |     for i = 1:length(xs)
186 |         xstr = xs[i]
187 |         prob_y = ps[i]
188 |         xndx = encode_x(model, xstr)
189 |         CPT_y = model.CPT_y_x[xndx,:]
190 |         @test isapprox(CPT_y, prob_y; atol=0.1)
191 |     end
192 |     println("tests passed")
193 | end
194 | 
195 | function update_surrogate!(model, xstr, yval)
196 |     xdomain = fill(model.A, model.L)
197 |     xndx = encode_x(model, xstr)
198 |     yndx = encode_y(model, yval)
199 |     lik_c = model.CPT_y_xc[xndx, :, yndx]
200 |     post_c = normalize_probdist(lik_c .* model.CPT_c)
201 |     model.CPT_c = post_c
202 |     model.CPT_y_x = marginalize_CPT_parent(model.CPT_y_xc, model.CPT_c)
203 | end
204 | 
205 | function update_surrogate_test()
206 |     # Start with uniform prior.
207 |     # Make a lucky guess, which has f(x)=0, which implies code=x
208 |     xs = [ [1,1,1], [1,2,3]]
209 |     for xstr in xs
210 |         L = 3; A = 4; partial_code = [0,0,0]
211 |         model = make_surrogate(L, A, partial_code)
212 |         xcode = encode_x(model, xstr)
213 |         yval = 0
214 |         update_surrogate!(model, xstr, yval)
215 |         expected_post = zeros(A^L);
216 |         expected_post[xcode] = 1.0
217 |         @test isapprox(model.CPT_c, expected_post, atol=0.1)
218 |     end
219 |     println("tests passed")
220 | end
221 | 
222 | 
223 | function expected_improvement(model, xstr, incumbent_val)
224 |     xndx = encode_x(model, xstr)
225 |     CPT_y = model.CPT_y_x[xndx,:]
226 |     fn(y) = max(0, incumbent_val-y) # if y<incumbent_val, we decrease f
227 |     ei = 0
228 |     for (yi, yv) in enumerate(model.frange)
229 |         ei += CPT_y[yi] * fn(yv)
230 |     end
231 |     return ei
232 | end
233 | 
234 | function expected_value(model, xstr)
235 |     xndx = encode_x(model, xstr)
236 |     CPT_y = model.CPT_y_x[xndx,:]
237 |     ei = 0
238 |     for (yi, yv) in enumerate(model.frange)
239 |         ei += CPT_y[yi] * yv
240 |     end
241 |     return ei
242 | end
243 | 
244 | function plot_surrogate(model, true_code_str, incumbent_val)
245 |     xstrings = [decode_x(model, xndx) for xndx in model.fdom]
246 |     ev = [expected_value(model, xstr) for xstr in xstrings]
247 |     eic = [expected_improvement(model, xstr, incumbent_val) for xstr in xstrings]
248 |     obj = [hamming_distance(xstr, true_code_str) for xstr in xstrings]
249 |     nstrings = length(xstrings)
250 |     xs = 1:nstrings
251 |     #https://docs.juliaplots.org/latest/tutorial/
252 |     #:auto, :solid, :dash, :dot
253 |     #plot(xs, obj, color="red", linestyle=:solid)
254 |     plot(xs,[obj ev eic],label=["obj" "ev" "eic"], linestyle=[:solid :dash :dot])
255 | end
256 | 
257 | 
258 | L = 3; A = 4; true_code_str = [1,1,1]; partial_code = [0,0,0]
259 | model = make_surrogate(L, A, partial_code)
260 | incumbent_val = maximum(model.frange)
261 | plot_surrogate(model, true_code_str, incumbent_val)
262 | 
263 | xstr = [1,1,2]; yval = hamming_distance(xstr, true_code_str)
264 | update_surrogate!(model, xstr, yval)
265 | incumbent_val = min(yval , incumbent_val)
266 | plot_surrogate(model, true_code_str, incumbent_val)
267 | 
268 | xstr = [1,1,4]; yval = hamming_distance(xstr, true_code_str)
269 | update_surrogate!(model, xstr, yval)
270 | incumbent_val = min(yval , incumbent_val)
271 | plot_surrogate(model, true_code_str, incumbent_val)
272 | 
273 | xstr = [1,1,3]; yval = hamming_distance(xstr, true_code_str)
274 | update_surrogate!(model, xstr, yval)
275 | incumbent_val = min(yval , incumbent_val)
276 | plot_surrogate(model, true_code_str, incumbent_val)
277 | 


--------------------------------------------------------------------------------
/Old/indexing.jl:
--------------------------------------------------------------------------------
  1 | """
  2 | Code to Map linear indices to/from Caresian indices
  3 | #https://discourse.julialang.org/t/psa-replacement-of-ind2sub-sub2ind-in-julia-0-7/14666/26
  4 | """
  5 | using Test
  6 | 
  7 | function ind2subv_str(dims)
  8 |     """Create array A where i'th row is a set of cartesian indices (1 indexed)
  9 |     corresponding to element i of an array, with size dims=[b b .. b],
 10 |     where b=base and there are n columns.
 11 |     A is an int array of size b^n * n.
 12 |     This is equivalent to matlab ind2subv(dims, 1:prod(dims)).
 13 |     https://homepages.inf.ed.ac.uk/imurray2/code/imurray-matlab/ind2subv.m
 14 |     except we require dims to be homogeneous.
 15 |     """
 16 |     @assert all(dims .== dims[1]) "dims must have same arity"
 17 |     b = dims[1]
 18 |     n = length(dims)
 19 |     indices = Array{Int}(undef, b^n, n)
 20 |     for i=1:b^n
 21 |         str = string(i-1, base=b, pad=n)
 22 |         chars = split(str, "") #[str[i] for i in 1:nindices]
 23 |         indices[i,:] = [parse(Int, c) for c in chars]
 24 |     end
 25 |     return indices .+ 1
 26 | end
 27 | 
 28 | 
 29 | function ind2subv_str_test()
 30 |     L = 2; A = 3;
 31 |     dims = fill(A, L)
 32 |     indices = ind2subv_str(dims)
 33 |     println(indices)
 34 |     @assert indices == [1 1; 1 2; 1 3; 2 1; 2 2; 2 3; 3 1; 3 2; 3 3]
 35 | end
 36 | 
 37 | function foo()
 38 |     map(collect(-3:3)) do x
 39 |         if x == 0 return 0
 40 |         elseif iseven(x) return 2
 41 |         elseif isodd(x) return 1
 42 |         end
 43 |     end
 44 | end
 45 | 
 46 | 
 47 | 
 48 | function ind2sub_array(shape, idx; rowmajor=true)
 49 |     # Translated from
 50 |     # https://stackoverflow.com/questions/46782444/how-to-convert-a-linear-index-to-subscripts-with-support-for-negative-strides
 51 |     # matlab/julia is column major, python/C is row mmajor
 52 |     idx = idx - 1 # switch to 0-based indexing
 53 |     d = length(shape)
 54 |     out = Array{Int}(undef, d)
 55 |     if rowmajor
 56 |         for i = 1:d
 57 |             s = rem(idx, shape[i])
 58 |             idx = idx - s
 59 |             idx = div(idx, shape[i])
 60 |             out[i] = s
 61 |         end
 62 |     else
 63 |         for i = d:-1:1
 64 |             s = rem(idx, shape[i])
 65 |             idx = idx - s
 66 |             idx = div(idx, shape[i])
 67 |             out[i] = s
 68 |         end
 69 |     end
 70 |     return out .+ 1;
 71 | end
 72 | 
 73 | function ind2subv_array(shape, indices)
 74 |     # https://github.com/probml/pmtk3/blob/master/matlabTools/util/ind2subv.m
 75 |     n = length(indices)
 76 |     d = length(shape)
 77 |     out = Array{Int}(undef, n, d)
 78 |     for i=1:n
 79 |         out[i,:] = ind2sub_array(shape, indices[i])
 80 |     end
 81 |     return out
 82 | end
 83 | 
 84 | function ind2subv_array_test()
 85 |     sub = ind2subv_array([3 3], 1:9);
 86 |     # This is for columnmajor
 87 |     #@assert sub == [1 1; 1 2; 1 3; 2 1; 2 2; 2 3; 3 1; 3 2; 3 3]
 88 |     @assert sub == [1 1; 2 1; 3 1; 1 2; 2 2; 3 2; 1 3; 2 3; 3 3]
 89 | 
 90 |     sub = ind2subv_array([2 1 3], 1:6)
 91 |     #@assert sub == [1 1 1; 1 1 2; 1 1 3; 2 1 1; 2 1 2; 2 1 3]
 92 |     @assert sub == [1 1 1; 2 1 1; 1 1 2; 2 1 2; 1 1 3; 2 1 3]
 93 | end
 94 | 
 95 | function ind2sub(shape, lndx)
 96 |     """Given linear index lndx and array dimensions shape,
 97 |     return a tuple of cartesian indices (using rowmajor order).
 98 | 
 99 |     Example:
100 |     ind2sub([3,4,2], 4) = (1, 2, 1)
101 | 
102 |     Based on this code:
103 |     https://discourse.julialang.org/t/psa-replacement-of-ind2sub-sub2ind-in-julia-0-7/14666
104 |     """
105 |     cndx = CartesianIndices(Dims(shape))
106 |     return Tuple(cndx[lndx])
107 | end
108 | 
109 | function ind2sub_test()
110 |     shapes = ( (3,4,2), (1,3,1,4) )
111 |     for shape in shapes
112 |         n = prod(shape)
113 |         for i=1:n
114 |             cndx = ind2sub(shape, i)
115 |             subs = ind2sub_array(shape, i)
116 |             @assert cndx == Tuple(subs)
117 |         end
118 |     end
119 | end
120 | 
121 | function ind2subv(shape, indices)
122 |     """Map linear indices to cartesian.
123 |     shape: d-tuple with size of each dimension.
124 |     indices: n-list with linear indices.
125 |     Returns: n-vector of d-tuples with cartesian indices.
126 |     Similar to this matlab function:
127 |     https://github.com/probml/pmtk3/blob/master/matlabTools/util/ind2subv.m
128 |     """
129 |     n = length(indices)
130 |     d = length(shape)
131 |     cndx = CartesianIndices(Dims(shape))
132 |     out = Array{Tuple}(undef, n)
133 |     for i=1:n
134 |         lndx = indices[i]
135 |         #tmp1 = ind2sub(shape, lndx)
136 |         #tmp2 = Tuple(cndx[lndx])
137 |         #@assert tmp1 == tmp2
138 |         out[i] = cndx[lndx]
139 |     end
140 |     return out
141 | end
142 | 
143 | function ind2subv_test()
144 |     sub = ind2subv([3 3], 1:9);
145 |     @assert sub == [ (1, 1), (2, 1), (3, 1), (1, 2), (2, 2), (3, 2), (1, 3), (2, 3), (3, 3)]
146 | 
147 |     sub = ind2subv([2 1 3], 1:6)
148 |     @assert sub == [(1, 1, 1), (2, 1, 1), (1, 1, 2), (2, 1, 2), (1, 1, 3), (2, 1, 3)]
149 | end
150 | 
151 | 
152 | #####
153 | # Map Cartesian indices to linear indices
154 | 
155 | function subv2ind_str(dims, indices)
156 |     """Create vector v where i'th element is integer version corresponding to
157 |     cartesian indices with value indices[i,:], where dims=[b b .. b].
158 |     Equivalent to matlab subv2ind(dims, indices) defined here:
159 |     https://homepages.inf.ed.ac.uk/imurray2/code/imurray-matlab/subv2ind.m
160 |     except we require dims to be homogeneous.
161 |     """
162 |     @assert all(dims .== dims[1]) "dims must have same arity"
163 |     b = dims[1]
164 |     nrows = size(indices, 1)
165 |     v = Array{Int}(undef, nrows)
166 |     for i=1:nrows
167 |         ndx = [string(k-1) for k in indices[i,:]]
168 |         v[i] = parse(Int, join(ndx), base=b) + 1
169 |     end
170 |     return v
171 | end
172 | 
173 | function subv2ind_str_test()
174 |     L = 2; A = 3;
175 |     ndx = subv2ind_str(dims, indices)
176 |     println(ndx)
177 |     @assert ndx == 1:(A^L)
178 | end
179 | 
180 | 
181 | function mystrides(shape::Array{Int,1}; rowmajor=true)::Array{Int}
182 |     # matlab/julia is column major (first index changes fastest),
183 |     # python/C is row major
184 |     n = length(shape)
185 |     str = ones(n)
186 |     if rowmajor
187 |         for i=1:n
188 |             if i==1
189 |                 s = 1;
190 |             else
191 |                 s = prod(shape[1:i-1])
192 |             end
193 |             str[i] = s
194 |         end
195 |     else
196 |         for i=1:n
197 |             if i==n
198 |                 s = 1;
199 |             else
200 |                 s = prod(shape[i+1:n])
201 |             end
202 |             str[i] = s
203 |         end
204 |     end
205 |     return str
206 | end
207 | 
208 | 
209 | function sub2ind_array(shape::Array{Int,1}, subscripts::Array{Int,1})::Int
210 |     # Translated from
211 |     # https://stackoverflow.com/questions/46782444/how-to-convert-a-linear-index-to-subscripts-with-support-for-negative-strides
212 |     n = length(shape)
213 |     idx  = 0
214 |     subscripts = subscripts .- 1
215 |     str = mystrides(shape)
216 |     for i=1:n
217 |         sub = subscripts[i]
218 |         s = str[i]
219 |         idx = idx + sub * s
220 |     end
221 |     return idx + 1
222 | end
223 | 
224 | 
225 | function subv2ind_array(shape::Array{Int,1}, subs::Array{Int,2})::Array{Int}
226 |     #https://github.com/tminka/lightspeed/blob/master/subv2ind.m
227 |     n = size(subs, 1)
228 |     ndx = zeros(n)
229 |     for i=1:n
230 |         ndx[i] = sub2ind_array(shape, subs[i,:])
231 |     end
232 |     return ndx
233 | end
234 | 
235 | 
236 | function subv2ind_array_test()
237 |     shapes = ( [3,4,2], [4,1,5,2])
238 |     for shape in shapes
239 |         K = prod(shape)
240 |         subs = ind2subv_array(shape, 1:K)
241 |         ndx = subv2ind_array(shape, subs)
242 |         @assert ndx == 1:K
243 |     end
244 | end
245 | 
246 | function sub2ind(shape, cndx)
247 |     lndx = LinearIndices(Dims(shape))
248 |     return lndx[cndx...]
249 | end
250 | 
251 | function subv2ind(shape, cindices)
252 |     """Return linear indices given vector of cartesian indices.
253 |     shape: d-tuple of dimenions.
254 |     cindices: n-vector of d-tuples, each containing cartesian indices.
255 |     Returns: n-vector of linear indices.
256 |     Similar to this matlab function:
257 |     https://github.com/tminka/lightspeed/blob/master/subv2ind.m
258 |     """
259 |     lndx = LinearIndices(Dims(shape))
260 |     n = length(cindices)
261 |     out = Array{Int}(undef, n)
262 |     for i = 1:n
263 |         out[i] = lndx[cindices[i]...]
264 |     end
265 |     return out
266 | end
267 | 
268 | function subv2ind_test()
269 |     shapes = [(3,4,2), (4,1,5,2)]
270 |     for shape in shapes
271 |         K = prod(shape)
272 |         subs = ind2subv(shape, 1:K)
273 |         ndx = subv2ind(shape, subs)
274 |         @assert ndx == 1:K
275 |     end
276 | end
277 | 
278 | function cart2lin(shape, indices)
279 |     """
280 |     Transform vector of cartesian indices to linear indices, cf ind2subv.
281 |     Example:
282 |     cart2lin([3,4,2],[CI(3,1,1), CI(2,2,1)]) =  [3, 5]
283 |     """
284 |     lndx = LinearIndices(Dims(shape))
285 |     cindices = [CartesianIndex(i) for i in indices]
286 |     return getindex.(Ref(lndx), cindices)
287 | end
288 | 
289 | function lin2cart(shape, indices)
290 |     """
291 |     Transform linear indices to cartesian ,cf ind2subv.
292 |     Example:
293 |     lin2cart((3,4,2),[3,5]) = [ CI(3, 1, 1), CI(2, 2, 1)]
294 |     """
295 |     # Thanks to James Bradbury for this code
296 |     CI = CartesianIndices(Dims(shape))
297 |     return getindex.(Ref(CI), indices)
298 | end
299 | 
300 | 
301 | 
302 | function lin_cart_test()
303 |     shape = (3,4,2)
304 |     cndx = [(3,1,1), (2,2,1)]
305 |     lndx = cart2lin(shape, cndx)
306 |     @test lndx == [3,5]
307 |     shapes = [(3,4,2), (4,1,5,2)]
308 |     for shape in shapes
309 |         K = prod(shape)
310 |         cndx = lin2cart(shape, 1:K)
311 |         lndx = cart2lin(shape, cndx)
312 |         @test lndx == 1:K
313 |     end
314 | end
315 | 
316 | 
317 | 
318 | ind2subv_array_test()
319 | ind2subv_test()
320 | subv2ind_array_test()
321 | subv2ind_test()
322 | lin_cart_test()
323 | 
324 | println("all tests passed")
325 | 


--------------------------------------------------------------------------------
/Old/support_code.jl:
--------------------------------------------------------------------------------
  1 | # https://github.com/sisl/algforopt-notebooks/blob/master/support_code.jl
  2 | 
  3 | # Also need this
  4 | #=
  5 | Pkg.add("https://github.com/sisl/Vec.jl.git")
  6 | Pkg.add("PGFPlots")
  7 | Pkg.add("Colors")
  8 | Pkg.add("ColorSchemes")
  9 | =#
 10 | 
 11 | using PGFPlots
 12 | using Colors
 13 | using ColorSchemes
 14 | using LinearAlgebra
 15 | pasteljet = ColorMaps.RGBArrayMap(ColorSchemes.viridis, interpolation_levels=500, invert=true);
 16 | pastelRedBlue = ColorMaps.RGBArrayMap([RGB(246/255, 21/255, 92/255),
 17 |                                        RGB(1.0,1.0,1.0),
 18 |                                        RGB( 27/255,161/255,234/255)], interpolation_levels=500);
 19 | pushPGFPlotsPreamble("\\pgfplotscreateplotcyclelist{pastelcolors}{%")
 20 | pushPGFPlotsPreamble("  solid, pastelPurple, mark=none\\%")
 21 | pushPGFPlotsPreamble("  solid, pastelBlue, mark=none\\%")
 22 | pushPGFPlotsPreamble("  solid, pastelGreen, mark=none\\%")
 23 | pushPGFPlotsPreamble("  solid, pastelRed, mark=none\\%")
 24 | pushPGFPlotsPreamble("  solid, pastelMagenta, mark=none\\%")
 25 | pushPGFPlotsPreamble("  solid, pastelOrange, mark=none\\%")
 26 | pushPGFPlotsPreamble("  solid, pastelSeaGreen, mark=none\\%")
 27 | pushPGFPlotsPreamble("}")
 28 | pushPGFPlotsPreamble("\\usepackage{amsmath}")
 29 | pushPGFPlotsPreamble("\\usepgfplotslibrary{fillbetween}")
 30 | pushPGFPlotsPreamble("\\usepgfplotslibrary{statistics}")
 31 | pushPGFPlotsPreamble("\\definecolor{pastelMagenta}{HTML}{FF48CF}")
 32 | pushPGFPlotsPreamble("\\definecolor{pastelPurple}{HTML}{8770FE}")
 33 | pushPGFPlotsPreamble("\\definecolor{pastelBlue}{HTML}{1BA1EA}")
 34 | pushPGFPlotsPreamble("\\definecolor{pastelSeaGreen}{HTML}{14B57F}")
 35 | pushPGFPlotsPreamble("\\definecolor{pastelGreen}{HTML}{3EAA0D}")
 36 | pushPGFPlotsPreamble("\\definecolor{pastelOrange}{HTML}{C38D09}")
 37 | pushPGFPlotsPreamble("\\definecolor{pastelRed}{HTML}{F5615C}")
 38 | pushPGFPlotsPreamble("\\tikzset{myarrow/.style={line width = 0.05cm, ->, rounded corners=5mm}}")
 39 | pushPGFPlotsPreamble("\\newcommand{\\vect}[1]{\\boldsymbol{\\mathbf{#1}}}")
 40 | pushPGFPlotsPreamble("\\pgfplotsset{")
 41 | pushPGFPlotsPreamble("   	colormap={pasteljet}{")
 42 | pushPGFPlotsPreamble("		rgb=(0.99325,0.90616,0.14394)")
 43 | pushPGFPlotsPreamble("		rgb=(0.98387,0.90487,0.13690)")
 44 | pushPGFPlotsPreamble("		rgb=(0.97442,0.90359,0.13021)")
 45 | pushPGFPlotsPreamble("		rgb=(0.96489,0.90232,0.12394)")
 46 | pushPGFPlotsPreamble("		rgb=(0.95530,0.90107,0.11813)")
 47 | pushPGFPlotsPreamble("		rgb=(0.94564,0.89982,0.11284)")
 48 | pushPGFPlotsPreamble("		rgb=(0.93590,0.89857,0.10813)")
 49 | pushPGFPlotsPreamble("		rgb=(0.92611,0.89733,0.10407)")
 50 | pushPGFPlotsPreamble("		rgb=(0.91624,0.89609,0.10072)")
 51 | pushPGFPlotsPreamble("		rgb=(0.90631,0.89485,0.09813)")
 52 | pushPGFPlotsPreamble("		rgb=(0.89632,0.89362,0.09634)")
 53 | pushPGFPlotsPreamble("		rgb=(0.88627,0.89237,0.09537)")
 54 | pushPGFPlotsPreamble("		rgb=(0.87617,0.89112,0.09525)")
 55 | pushPGFPlotsPreamble("		rgb=(0.86601,0.88987,0.09595)")
 56 | pushPGFPlotsPreamble("		rgb=(0.85581,0.88860,0.09745)")
 57 | pushPGFPlotsPreamble("		rgb=(0.84556,0.88732,0.09970)")
 58 | pushPGFPlotsPreamble("		rgb=(0.83527,0.88603,0.10265)")
 59 | pushPGFPlotsPreamble("		rgb=(0.82494,0.88472,0.10622)")
 60 | pushPGFPlotsPreamble("		rgb=(0.81458,0.88339,0.11035)")
 61 | pushPGFPlotsPreamble("		rgb=(0.80418,0.88205,0.11496)")
 62 | pushPGFPlotsPreamble("		rgb=(0.79376,0.88068,0.12001)")
 63 | pushPGFPlotsPreamble("		rgb=(0.78331,0.87928,0.12540)")
 64 | pushPGFPlotsPreamble("		rgb=(0.77285,0.87787,0.13111)")
 65 | pushPGFPlotsPreamble("		rgb=(0.76237,0.87642,0.13706)")
 66 | pushPGFPlotsPreamble("		rgb=(0.75188,0.87495,0.14323)")
 67 | pushPGFPlotsPreamble("		rgb=(0.74139,0.87345,0.14956)")
 68 | pushPGFPlotsPreamble("		rgb=(0.73089,0.87192,0.15603)")
 69 | pushPGFPlotsPreamble("		rgb=(0.72039,0.87035,0.16260)")
 70 | pushPGFPlotsPreamble("		rgb=(0.70990,0.86875,0.16926)")
 71 | pushPGFPlotsPreamble("		rgb=(0.69942,0.86712,0.17597)")
 72 | pushPGFPlotsPreamble("		rgb=(0.68894,0.86545,0.18272)")
 73 | pushPGFPlotsPreamble("		rgb=(0.67849,0.86374,0.18950)")
 74 | pushPGFPlotsPreamble("		rgb=(0.66805,0.86200,0.19629)")
 75 | pushPGFPlotsPreamble("		rgb=(0.65764,0.86022,0.20308)")
 76 | pushPGFPlotsPreamble("		rgb=(0.64726,0.85840,0.20986)")
 77 | pushPGFPlotsPreamble("		rgb=(0.63690,0.85654,0.21662)")
 78 | pushPGFPlotsPreamble("		rgb=(0.62658,0.85464,0.22335)")
 79 | pushPGFPlotsPreamble("		rgb=(0.61629,0.85271,0.23005)")
 80 | pushPGFPlotsPreamble("		rgb=(0.60604,0.85073,0.23671)")
 81 | pushPGFPlotsPreamble("		rgb=(0.59584,0.84872,0.24333)")
 82 | pushPGFPlotsPreamble("		rgb=(0.58568,0.84666,0.24990)")
 83 | pushPGFPlotsPreamble("		rgb=(0.57556,0.84457,0.25642)")
 84 | pushPGFPlotsPreamble("		rgb=(0.56550,0.84243,0.26288)")
 85 | pushPGFPlotsPreamble("		rgb=(0.55548,0.84025,0.26928)")
 86 | pushPGFPlotsPreamble("		rgb=(0.54552,0.83804,0.27563)")
 87 | pushPGFPlotsPreamble("		rgb=(0.53562,0.83579,0.28191)")
 88 | pushPGFPlotsPreamble("		rgb=(0.52578,0.83349,0.28813)")
 89 | pushPGFPlotsPreamble("		rgb=(0.51599,0.83116,0.29428)")
 90 | pushPGFPlotsPreamble("		rgb=(0.50627,0.82879,0.30036)")
 91 | pushPGFPlotsPreamble("		rgb=(0.49661,0.82638,0.30638)")
 92 | pushPGFPlotsPreamble("		rgb=(0.48703,0.82393,0.31232)")
 93 | pushPGFPlotsPreamble("		rgb=(0.47750,0.82144,0.31820)")
 94 | pushPGFPlotsPreamble("		rgb=(0.46805,0.81892,0.32400)")
 95 | pushPGFPlotsPreamble("		rgb=(0.45867,0.81636,0.32973)")
 96 | pushPGFPlotsPreamble("		rgb=(0.44937,0.81377,0.33538)")
 97 | pushPGFPlotsPreamble("		rgb=(0.44014,0.81114,0.34097)")
 98 | pushPGFPlotsPreamble("		rgb=(0.43098,0.80847,0.34648)")
 99 | pushPGFPlotsPreamble("		rgb=(0.42191,0.80577,0.35191)")
100 | pushPGFPlotsPreamble("		rgb=(0.41291,0.80304,0.35727)")
101 | pushPGFPlotsPreamble("		rgb=(0.40400,0.80027,0.36255)")
102 | pushPGFPlotsPreamble("		rgb=(0.39517,0.79748,0.36776)")
103 | pushPGFPlotsPreamble("		rgb=(0.38643,0.79464,0.37289)")
104 | pushPGFPlotsPreamble("		rgb=(0.37778,0.79178,0.37794)")
105 | pushPGFPlotsPreamble("		rgb=(0.36921,0.78889,0.38291)")
106 | pushPGFPlotsPreamble("		rgb=(0.36074,0.78596,0.38781)")
107 | pushPGFPlotsPreamble("		rgb=(0.35236,0.78301,0.39264)")
108 | pushPGFPlotsPreamble("		rgb=(0.34407,0.78003,0.39738)")
109 | pushPGFPlotsPreamble("		rgb=(0.33588,0.77702,0.40205)")
110 | pushPGFPlotsPreamble("		rgb=(0.32780,0.77398,0.40664)")
111 | pushPGFPlotsPreamble("		rgb=(0.31981,0.77091,0.41115)")
112 | pushPGFPlotsPreamble("		rgb=(0.31193,0.76782,0.41559)")
113 | pushPGFPlotsPreamble("		rgb=(0.30415,0.76470,0.41994)")
114 | pushPGFPlotsPreamble("		rgb=(0.29648,0.76156,0.42422)")
115 | pushPGFPlotsPreamble("		rgb=(0.28892,0.75839,0.42843)")
116 | pushPGFPlotsPreamble("		rgb=(0.28148,0.75520,0.43255)")
117 | pushPGFPlotsPreamble("		rgb=(0.27415,0.75199,0.43660)")
118 | pushPGFPlotsPreamble("		rgb=(0.26694,0.74875,0.44057)")
119 | pushPGFPlotsPreamble("		rgb=(0.25986,0.74549,0.44447)")
120 | pushPGFPlotsPreamble("		rgb=(0.25290,0.74221,0.44828)")
121 | pushPGFPlotsPreamble("		rgb=(0.24607,0.73891,0.45202)")
122 | pushPGFPlotsPreamble("		rgb=(0.23937,0.73559,0.45569)")
123 | pushPGFPlotsPreamble("		rgb=(0.23281,0.73225,0.45928)")
124 | pushPGFPlotsPreamble("		rgb=(0.22640,0.72889,0.46279)")
125 | pushPGFPlotsPreamble("		rgb=(0.22012,0.72551,0.46623)")
126 | pushPGFPlotsPreamble("		rgb=(0.21400,0.72211,0.46959)")
127 | pushPGFPlotsPreamble("		rgb=(0.20803,0.71870,0.47287)")
128 | pushPGFPlotsPreamble("		rgb=(0.20222,0.71527,0.47608)")
129 | pushPGFPlotsPreamble("		rgb=(0.19657,0.71183,0.47922)")
130 | pushPGFPlotsPreamble("		rgb=(0.19109,0.70837,0.48228)")
131 | pushPGFPlotsPreamble("		rgb=(0.18578,0.70489,0.48527)")
132 | pushPGFPlotsPreamble("		rgb=(0.18065,0.70140,0.48819)")
133 | pushPGFPlotsPreamble("		rgb=(0.17571,0.69790,0.49103)")
134 | pushPGFPlotsPreamble("		rgb=(0.17095,0.69438,0.49380)")
135 | pushPGFPlotsPreamble("		rgb=(0.16638,0.69086,0.49650)")
136 | pushPGFPlotsPreamble("		rgb=(0.16202,0.68732,0.49913)")
137 | pushPGFPlotsPreamble("		rgb=(0.15785,0.68376,0.50169)")
138 | pushPGFPlotsPreamble("		rgb=(0.15389,0.68020,0.50417)")
139 | pushPGFPlotsPreamble("		rgb=(0.15015,0.67663,0.50659)")
140 | pushPGFPlotsPreamble("		rgb=(0.14662,0.67305,0.50894)")
141 | pushPGFPlotsPreamble("		rgb=(0.14330,0.66946,0.51121)")
142 | pushPGFPlotsPreamble("		rgb=(0.14021,0.66586,0.51343)")
143 | pushPGFPlotsPreamble("		rgb=(0.13734,0.66225,0.51557)")
144 | pushPGFPlotsPreamble("		rgb=(0.13469,0.65864,0.51765)")
145 | pushPGFPlotsPreamble("		rgb=(0.13227,0.65501,0.51966)")
146 | pushPGFPlotsPreamble("		rgb=(0.13007,0.65138,0.52161)")
147 | pushPGFPlotsPreamble("		rgb=(0.12809,0.64775,0.52349)")
148 | pushPGFPlotsPreamble("		rgb=(0.12633,0.64411,0.52531)")
149 | pushPGFPlotsPreamble("		rgb=(0.12478,0.64046,0.52707)")
150 | pushPGFPlotsPreamble("		rgb=(0.12344,0.63681,0.52876)")
151 | pushPGFPlotsPreamble("		rgb=(0.12231,0.63315,0.53040)")
152 | pushPGFPlotsPreamble("		rgb=(0.12138,0.62949,0.53197)")
153 | pushPGFPlotsPreamble("		rgb=(0.12064,0.62583,0.53349)")
154 | pushPGFPlotsPreamble("		rgb=(0.12008,0.62216,0.53495)")
155 | pushPGFPlotsPreamble("		rgb=(0.11970,0.61849,0.53635)")
156 | pushPGFPlotsPreamble("		rgb=(0.11948,0.61482,0.53769)")
157 | pushPGFPlotsPreamble("		rgb=(0.11942,0.61114,0.53898)")
158 | pushPGFPlotsPreamble("		rgb=(0.11951,0.60746,0.54022)")
159 | pushPGFPlotsPreamble("		rgb=(0.11974,0.60379,0.54140)")
160 | pushPGFPlotsPreamble("		rgb=(0.12009,0.60010,0.54253)")
161 | pushPGFPlotsPreamble("		rgb=(0.12057,0.59642,0.54361)")
162 | pushPGFPlotsPreamble("		rgb=(0.12115,0.59274,0.54464)")
163 | pushPGFPlotsPreamble("		rgb=(0.12183,0.58905,0.54562)")
164 | pushPGFPlotsPreamble("		rgb=(0.12261,0.58537,0.54656)")
165 | pushPGFPlotsPreamble("		rgb=(0.12346,0.58169,0.54744)")
166 | pushPGFPlotsPreamble("		rgb=(0.12440,0.57800,0.54829)")
167 | pushPGFPlotsPreamble("		rgb=(0.12539,0.57432,0.54909)")
168 | pushPGFPlotsPreamble("		rgb=(0.12645,0.57063,0.54984)")
169 | pushPGFPlotsPreamble("		rgb=(0.12757,0.56695,0.55056)")
170 | pushPGFPlotsPreamble("		rgb=(0.12873,0.56327,0.55123)")
171 | pushPGFPlotsPreamble("		rgb=(0.12993,0.55958,0.55186)")
172 | pushPGFPlotsPreamble("		rgb=(0.13117,0.55590,0.55246)")
173 | pushPGFPlotsPreamble("		rgb=(0.13244,0.55222,0.55302)")
174 | pushPGFPlotsPreamble("		rgb=(0.13374,0.54853,0.55354)")
175 | pushPGFPlotsPreamble("		rgb=(0.13507,0.54485,0.55403)")
176 | pushPGFPlotsPreamble("		rgb=(0.13641,0.54117,0.55448)")
177 | pushPGFPlotsPreamble("		rgb=(0.13777,0.53749,0.55491)")
178 | pushPGFPlotsPreamble("		rgb=(0.13915,0.53381,0.55530)")
179 | pushPGFPlotsPreamble("		rgb=(0.14054,0.53013,0.55566)")
180 | pushPGFPlotsPreamble("		rgb=(0.14194,0.52645,0.55599)")
181 | pushPGFPlotsPreamble("		rgb=(0.14334,0.52277,0.55629)")
182 | pushPGFPlotsPreamble("		rgb=(0.14476,0.51909,0.55657)")
183 | pushPGFPlotsPreamble("		rgb=(0.14618,0.51541,0.55682)")
184 | pushPGFPlotsPreamble("		rgb=(0.14761,0.51173,0.55705)")
185 | pushPGFPlotsPreamble("		rgb=(0.14904,0.50805,0.55725)")
186 | pushPGFPlotsPreamble("		rgb=(0.15048,0.50437,0.55743)")
187 | pushPGFPlotsPreamble("		rgb=(0.15192,0.50069,0.55759)")
188 | pushPGFPlotsPreamble("		rgb=(0.15336,0.49700,0.55772)")
189 | pushPGFPlotsPreamble("		rgb=(0.15482,0.49331,0.55784)")
190 | pushPGFPlotsPreamble("		rgb=(0.15627,0.48962,0.55794)")
191 | pushPGFPlotsPreamble("		rgb=(0.15773,0.48593,0.55801)")
192 | pushPGFPlotsPreamble("		rgb=(0.15919,0.48224,0.55807)")
193 | pushPGFPlotsPreamble("		rgb=(0.16067,0.47854,0.55812)")
194 | pushPGFPlotsPreamble("		rgb=(0.16214,0.47484,0.55814)")
195 | pushPGFPlotsPreamble("		rgb=(0.16362,0.47113,0.55815)")
196 | pushPGFPlotsPreamble("		rgb=(0.16512,0.46742,0.55814)")
197 | pushPGFPlotsPreamble("		rgb=(0.16662,0.46371,0.55812)")
198 | pushPGFPlotsPreamble("		rgb=(0.16813,0.45999,0.55808)")
199 | pushPGFPlotsPreamble("		rgb=(0.16965,0.45626,0.55803)")
200 | pushPGFPlotsPreamble("		rgb=(0.17118,0.45253,0.55797)")
201 | pushPGFPlotsPreamble("		rgb=(0.17272,0.44879,0.55788)")
202 | pushPGFPlotsPreamble("		rgb=(0.17427,0.44504,0.55779)")
203 | pushPGFPlotsPreamble("		rgb=(0.17584,0.44129,0.55768)")
204 | pushPGFPlotsPreamble("		rgb=(0.17742,0.43753,0.55756)")
205 | pushPGFPlotsPreamble("		rgb=(0.17902,0.43376,0.55743)")
206 | pushPGFPlotsPreamble("		rgb=(0.18063,0.42997,0.55728)")
207 | pushPGFPlotsPreamble("		rgb=(0.18226,0.42618,0.55712)")
208 | pushPGFPlotsPreamble("		rgb=(0.18390,0.42238,0.55694)")
209 | pushPGFPlotsPreamble("		rgb=(0.18556,0.41857,0.55675)")
210 | pushPGFPlotsPreamble("		rgb=(0.18723,0.41475,0.55655)")
211 | pushPGFPlotsPreamble("		rgb=(0.18892,0.41091,0.55633)")
212 | pushPGFPlotsPreamble("		rgb=(0.19063,0.40706,0.55609)")
213 | pushPGFPlotsPreamble("		rgb=(0.19236,0.40320,0.55584)")
214 | pushPGFPlotsPreamble("		rgb=(0.19410,0.39932,0.55556)")
215 | pushPGFPlotsPreamble("		rgb=(0.19586,0.39543,0.55528)")
216 | pushPGFPlotsPreamble("		rgb=(0.19764,0.39153,0.55497)")
217 | pushPGFPlotsPreamble("		rgb=(0.19943,0.38761,0.55464)")
218 | pushPGFPlotsPreamble("		rgb=(0.20124,0.38367,0.55429)")
219 | pushPGFPlotsPreamble("		rgb=(0.20306,0.37972,0.55393)")
220 | pushPGFPlotsPreamble("		rgb=(0.20490,0.37575,0.55353)")
221 | pushPGFPlotsPreamble("		rgb=(0.20676,0.37176,0.55312)")
222 | pushPGFPlotsPreamble("		rgb=(0.20862,0.36775,0.55268)")
223 | pushPGFPlotsPreamble("		rgb=(0.21050,0.36373,0.55221)")
224 | pushPGFPlotsPreamble("		rgb=(0.21240,0.35968,0.55171)")
225 | pushPGFPlotsPreamble("		rgb=(0.21430,0.35562,0.55118)")
226 | pushPGFPlotsPreamble("		rgb=(0.21621,0.35153,0.55063)")
227 | pushPGFPlotsPreamble("		rgb=(0.21813,0.34743,0.55004)")
228 | pushPGFPlotsPreamble("		rgb=(0.22006,0.34331,0.54941)")
229 | pushPGFPlotsPreamble("		rgb=(0.22199,0.33916,0.54875)")
230 | pushPGFPlotsPreamble("		rgb=(0.22393,0.33499,0.54805)")
231 | pushPGFPlotsPreamble("		rgb=(0.22586,0.33081,0.54731)")
232 | pushPGFPlotsPreamble("		rgb=(0.22780,0.32659,0.54653)")
233 | pushPGFPlotsPreamble("		rgb=(0.22974,0.32236,0.54571)")
234 | pushPGFPlotsPreamble("		rgb=(0.23167,0.31811,0.54483)")
235 | pushPGFPlotsPreamble("		rgb=(0.23360,0.31383,0.54391)")
236 | pushPGFPlotsPreamble("		rgb=(0.23553,0.30953,0.54294)")
237 | pushPGFPlotsPreamble("		rgb=(0.23744,0.30520,0.54192)")
238 | pushPGFPlotsPreamble("		rgb=(0.23935,0.30085,0.54084)")
239 | pushPGFPlotsPreamble("		rgb=(0.24124,0.29648,0.53971)")
240 | pushPGFPlotsPreamble("		rgb=(0.24311,0.29209,0.53852)")
241 | pushPGFPlotsPreamble("		rgb=(0.24497,0.28768,0.53726)")
242 | pushPGFPlotsPreamble("		rgb=(0.24681,0.28324,0.53594)")
243 | pushPGFPlotsPreamble("		rgb=(0.24863,0.27877,0.53456)")
244 | pushPGFPlotsPreamble("		rgb=(0.25043,0.27429,0.53310)")
245 | pushPGFPlotsPreamble("		rgb=(0.25219,0.26978,0.53158)")
246 | pushPGFPlotsPreamble("		rgb=(0.25394,0.26525,0.52998)")
247 | pushPGFPlotsPreamble("		rgb=(0.25565,0.26070,0.52831)")
248 | pushPGFPlotsPreamble("		rgb=(0.25732,0.25613,0.52656)")
249 | pushPGFPlotsPreamble("		rgb=(0.25897,0.25154,0.52474)")
250 | pushPGFPlotsPreamble("		rgb=(0.26057,0.24692,0.52283)")
251 | pushPGFPlotsPreamble("		rgb=(0.26214,0.24229,0.52084)")
252 | pushPGFPlotsPreamble("		rgb=(0.26366,0.23763,0.51876)")
253 | pushPGFPlotsPreamble("		rgb=(0.26515,0.23296,0.51660)")
254 | pushPGFPlotsPreamble("		rgb=(0.26658,0.22826,0.51435)")
255 | pushPGFPlotsPreamble("		rgb=(0.26797,0.22355,0.51201)")
256 | pushPGFPlotsPreamble("		rgb=(0.26931,0.21882,0.50958)")
257 | pushPGFPlotsPreamble("		rgb=(0.27059,0.21407,0.50705)")
258 | pushPGFPlotsPreamble("		rgb=(0.27183,0.20930,0.50443)")
259 | pushPGFPlotsPreamble("		rgb=(0.27301,0.20452,0.50172)")
260 | pushPGFPlotsPreamble("		rgb=(0.27413,0.19972,0.49891)")
261 | pushPGFPlotsPreamble("		rgb=(0.27519,0.19490,0.49600)")
262 | pushPGFPlotsPreamble("		rgb=(0.27619,0.19007,0.49300)")
263 | pushPGFPlotsPreamble("		rgb=(0.27713,0.18523,0.48990)")
264 | pushPGFPlotsPreamble("		rgb=(0.27801,0.18037,0.48670)")
265 | pushPGFPlotsPreamble("		rgb=(0.27883,0.17549,0.48340)")
266 | pushPGFPlotsPreamble("		rgb=(0.27957,0.17060,0.48000)")
267 | pushPGFPlotsPreamble("		rgb=(0.28025,0.16569,0.47650)")
268 | pushPGFPlotsPreamble("		rgb=(0.28087,0.16077,0.47290)")
269 | pushPGFPlotsPreamble("		rgb=(0.28141,0.15583,0.46920)")
270 | pushPGFPlotsPreamble("		rgb=(0.28189,0.15088,0.46541)")
271 | pushPGFPlotsPreamble("		rgb=(0.28229,0.14591,0.46151)")
272 | pushPGFPlotsPreamble("		rgb=(0.28262,0.14093,0.45752)")
273 | pushPGFPlotsPreamble("		rgb=(0.28288,0.13592,0.45343)")
274 | pushPGFPlotsPreamble("		rgb=(0.28307,0.13090,0.44924)")
275 | pushPGFPlotsPreamble("		rgb=(0.28319,0.12585,0.44496)")
276 | pushPGFPlotsPreamble("		rgb=(0.28323,0.12078,0.44058)")
277 | pushPGFPlotsPreamble("		rgb=(0.28320,0.11568,0.43611)")
278 | pushPGFPlotsPreamble("		rgb=(0.28309,0.11055,0.43155)")
279 | pushPGFPlotsPreamble("		rgb=(0.28291,0.10539,0.42690)")
280 | pushPGFPlotsPreamble("		rgb=(0.28266,0.10020,0.42216)")
281 | pushPGFPlotsPreamble("		rgb=(0.28233,0.09495,0.41733)")
282 | pushPGFPlotsPreamble("		rgb=(0.28192,0.08967,0.41241)")
283 | pushPGFPlotsPreamble("		rgb=(0.28145,0.08432,0.40741)")
284 | pushPGFPlotsPreamble("		rgb=(0.28089,0.07891,0.40233)")
285 | pushPGFPlotsPreamble("		rgb=(0.28027,0.07342,0.39716)")
286 | pushPGFPlotsPreamble("		rgb=(0.27957,0.06784,0.39192)")
287 | pushPGFPlotsPreamble("		rgb=(0.27879,0.06214,0.38659)")
288 | pushPGFPlotsPreamble("		rgb=(0.27794,0.05632,0.38119)")
289 | pushPGFPlotsPreamble("		rgb=(0.27702,0.05034,0.37572)")
290 | pushPGFPlotsPreamble("		rgb=(0.27602,0.04417,0.37016)")
291 | pushPGFPlotsPreamble("		rgb=(0.27495,0.03775,0.36454)")
292 | pushPGFPlotsPreamble("		rgb=(0.27381,0.03150,0.35885)")
293 | pushPGFPlotsPreamble("		rgb=(0.27259,0.02556,0.35309)")
294 | pushPGFPlotsPreamble("		rgb=(0.27131,0.01994,0.34727)")
295 | pushPGFPlotsPreamble("		rgb=(0.26994,0.01463,0.34138)")
296 | pushPGFPlotsPreamble("		rgb=(0.26851,0.00961,0.33543)")
297 | pushPGFPlotsPreamble("		rgb=(0.26700,0.00487,0.32942)")
298 | pushPGFPlotsPreamble("	  }")
299 | pushPGFPlotsPreamble("}");
300 | 


--------------------------------------------------------------------------------