├── .gitignore ├── README.md ├── Old ├── main.jl ├── MurphykModule.jl ├── sklearn-demo.jl ├── turing-demo.jl ├── gauss1d-gibbs.jl ├── second-order.jl ├── tmp.jl ├── utils.jl ├── kalman_tracking_demo.jl ├── zigzag.jl ├── flux-demo.jl ├── logreg-opt-demo.jl ├── logreg-binary-opt-demo.jl ├── logreg-binary-opt-old.jl ├── rosenbrock-opt-demo.jl ├── dummy.jl ├── kalman_qe.jl ├── first-order.jl ├── code_breaker.jl ├── indexing.jl └── support_code.jl ├── zygote_demo.jl ├── julia-1-4-colab.ipynb ├── julia-install.txt ├── FactorGraphs ├── gauss-bayes-rule-multi.jl ├── gauss-bayes-rule.jl ├── compose_nodes.jl ├── kalman_smoother.jl ├── kalman_smoother_1d.jl ├── vb_unigauss.jl ├── ep_discrete_kf.jl ├── tmp.jl ├── kalman_filter.jl ├── kalman_filter_1d.jl ├── lin_probit_regression_corrupted_classlabels.jl ├── vb_gmm.jl ├── gauss-bayes-rule-2d.jl ├── structured_mean_field.jl ├── hmm.jl └── kalman_smoother_2d.jl └── notes.txt /.gitignore: -------------------------------------------------------------------------------- 1 | *~ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jpml 2 | Julia code for Probabilistic Machine Learning 3 | -------------------------------------------------------------------------------- /Old/main.jl: -------------------------------------------------------------------------------- 1 | include("utils.jl") 2 | using MurphykModule 3 | 4 | println(square(2)) 5 | println(normalize_probdist([1,2,3])) 6 | -------------------------------------------------------------------------------- /Old/MurphykModule.jl: -------------------------------------------------------------------------------- 1 | module MurphykModule 2 | 3 | export square 4 | 5 | square(x) = x^2 6 | 7 | cube(x) = x^3 8 | 9 | end 10 | -------------------------------------------------------------------------------- /zygote_demo.jl: -------------------------------------------------------------------------------- 1 | 2 | 3 | function f(x,w,n) 4 | a = 0.0 5 | for i=1:n 6 | a += w'*x; 7 | end 8 | return a; 9 | end 10 | f2(x,w,n) = n*w'*x; 11 | 12 | D = 3; 13 | w = rand(D); x = rand(D); 14 | @assert isapprox(f(x,w,n), f2(x,w,n)) 15 | 16 | # df/dx = n*w, df/dw = n*x 17 | g = gradient(f,x,w,n)[2] # gradient returns (df/dx, df/dw) 18 | g2 = n*x 19 | @assert isapprox(g, g2) 20 | 21 | println("time :") 22 | n = 10000 23 | @time gradient(f, x, w, n) 24 | 25 | println("done") 26 | -------------------------------------------------------------------------------- /julia-1-4-colab.ipynb: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "nbformat": 4, 4 | "nbformat_minor": 0, 5 | "metadata": { 6 | "colab": { 7 | "name": "Julia on Colab.ipynb", 8 | "version": "0.3.2", 9 | "provenance": [] 10 | }, 11 | "kernelspec": { 12 | "name": "julia-1.4", 13 | "display_name": "Julia 1.4" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "metadata": { 20 | "id": "oMSuTc3pDlHv", 21 | "colab_type": "code", 22 | "colab": {} 23 | }, 24 | "cell_type": "code", 25 | "source": [ 26 | "" 27 | ], 28 | "execution_count": 0, 29 | "outputs": [] 30 | } 31 | ] 32 | } -------------------------------------------------------------------------------- /julia-install.txt: -------------------------------------------------------------------------------- 1 | #https://github.com/Dsantra92/Julia-on-Colab/blob/master/julia_on_collab.ipynb 2 | #https://stackoverflow.com/questions/58270424/julia-in-google-colab 3 | 4 | !curl -sSL "https://julialang-s3.julialang.org/bin/linux/x64/1.4/julia-1.4.0-linux-x86_64.tar.gz" -o julia.tar.gz 5 | !tar -xzf julia.tar.gz -C /usr --strip-components 1 6 | !rm -rf julia.tar.gz* 7 | !julia -e 'using Pkg; pkg"add IJulia; precompile"' 8 | 9 | 10 | 11 | using Pkg 12 | pkg"add BenchmarkTools; precompile;" 13 | pkg"add CuArrays; precompile;" 14 | 15 | using BenchmarkTools 16 | mcpu = rand(2^10, 2^10) 17 | @benchmark mcpu*mcpu 18 | 19 | 20 | using CuArrays 21 | mgpu = cu(mcpu) 22 | @benchmark CuArrays.@sync mgpu*mgpu 23 | 24 | 25 | 26 | 27 | 28 | !julia -e 'using Pkg; pkg"add IJulia; add CuArrays; add Flux; precompile"' 29 | 30 | -------------------------------------------------------------------------------- /FactorGraphs/gauss-bayes-rule-multi.jl: -------------------------------------------------------------------------------- 1 | # Bayes rule for a linear Gaussian model 2 | # X -> Y, observe Y, infer X 3 | # Y ~ N(A x + b, R) 4 | # X ~ N(0, Q) 5 | 6 | #https://github.com/biaslab/ForneyLab.jl/issues/17#issuecomment-460290360 7 | 8 | using ForneyLab, LinearAlgebra 9 | 10 | function make_model() 11 | g = FactorGraph() 12 | 13 | nhidden = 4 14 | nobs = 2 15 | A = [1 0 0 0; 0 1 0 0] 16 | b = zeros(Float64, nobs) 17 | Q = eye(nhidden) 18 | R = 0.1*eye(nobs) 19 | y_data = randn(nobs) 20 | 21 | @RV x ~ GaussianMeanVariance(zeros(nhidden), Q) 22 | @RV obs_noise ~ GaussianMeanVariance(zeros(nobs), R) 23 | @RV y = A*x + b + obs_noise 24 | placeholder(y, :y, dims=(nobs,)) # add clamping 25 | end 26 | 27 | algo = Meta.parse(sumProductAlgorithm(x)) 28 | eval(algo) # Load algorithm 29 | data = Dict(:y => y_data) 30 | marginals = step!(data); 31 | -------------------------------------------------------------------------------- /Old/sklearn-demo.jl: -------------------------------------------------------------------------------- 1 | #https://scikitlearnjl.readthedocs.io/en/latest/quickstart/ 2 | 3 | using ScikitLearn 4 | using RDatasets: dataset 5 | 6 | iris = dataset("datasets", "iris") 7 | 8 | # ScikitLearn.jl expects arrays, but DataFrames can also be used - see 9 | # the corresponding section of the manual 10 | X = convert(Array, iris[[:SepalLength, :SepalWidth, :PetalLength, :PetalWidth]]) 11 | y = convert(Array, iris[:Species]) 12 | 13 | @sk_import linear_model: LogisticRegression 14 | model = LogisticRegression(fit_intercept=true) 15 | fit!(model, X, y) 16 | 17 | accuracy = sum(predict(model, X) .== y) / length(y) 18 | println("accuracy: $accuracy") 19 | 20 | using ScikitLearn.CrossValidation: cross_val_score 21 | 22 | # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_validation.py 23 | cross_val_score(LogisticRegression(), X, y; cv=5) # 5-fold 24 | -------------------------------------------------------------------------------- /Old/turing-demo.jl: -------------------------------------------------------------------------------- 1 | using Turing 2 | using StatsPlots 3 | 4 | # Define a simple Normal model with unknown mean and variance. 5 | @model gdemo(x, y) = begin 6 | s ~ InverseGamma(2,3) 7 | m ~ Normal(0,sqrt(s)) 8 | x ~ Normal(m, sqrt(s)) 9 | y ~ Normal(m, sqrt(s)) 10 | end 11 | 12 | # Run sampler, collect results 13 | niters = 10 14 | nparticles = 5 15 | x = 1.5; y = 2; 16 | @info "Starting SMC" 17 | c1 = sample(gdemo(x, y), SMC(niters)) 18 | @info "Starting PG" 19 | c2 = sample(gdemo(x, y), PG(nparticles, niters)) 20 | @info "Starting HMC" 21 | c3 = sample(gdemo(x, y), HMC(niters, 0.1, 5)) 22 | @info "Staring Gibbs" 23 | c4 = sample(gdemo(x, y), Gibbs(niters, PG(10, 2, :m), HMC(2, 0.1, 5, :s))) 24 | @info "starting HMCDA" 25 | c5 = sample(gdemo(x, y), HMCDA(niters, 0.15, 0.65)) 26 | @info "Starting NUTS" 27 | c6 = sample(gdemo(x, y), NUTS(niters, 0.65)) 28 | @info "Done" 29 | # Summarise results (currently requires the master branch from MCMCChain) 30 | describe(c3) 31 | 32 | # Plot and save results 33 | p = plot(c3) 34 | savefig("gdemo-plot.png") 35 | -------------------------------------------------------------------------------- /Old/gauss1d-gibbs.jl: -------------------------------------------------------------------------------- 1 | #https://discourse.julialang.org/t/learning-bayesian-data-analysis-using-julia/5370/14 2 | 3 | # mu ~ N(mu0, 1/tau0) 4 | # 1/sigma^2 ~ Ga(alpha, beta) 5 | # xn ~ N(mu, sigma) 6 | 7 | using Distributions 8 | 9 | function mc(x, iterations, μ0 = 0., τ0 = 1e-7, α = 0.0001, β = 0.0001) 10 | n = length(x) 11 | sumx = sum(x) 12 | μ, τ = sumx/n, 1/var(x) 13 | μs, τs = Float64[], Float64[] 14 | for i in 1:iterations 15 | μ = rand(Normal((τ0*μ0 + τ*sumx)/(τ0 + n*τ), 1/sqrt(τ0 + n*τ))) 16 | τ = rand(Gamma(α + n/2, 1/(β + 0.5*sum((xᵢ-μ)^2 for xᵢ in x)))) 17 | push!(μs, μ) 18 | push!(τs, τ) 19 | end 20 | μs, τs 21 | end 22 | 23 | n = 100 24 | mu = 0.2 25 | sigma = 1.7^(-0.5) 26 | x = rand(Normal(mu, sigma), n) 27 | nsamples = 1000 28 | μs, τs = mc(x, nsamples) 29 | println("mean μ = ", mean(μs), " ± ", std(μs), ", truth = ", mu) 30 | @printf("mean μ = %5.3f, ± std(μs) = %5.3f, truth = %5.3f", mean(μs), std(μs), mu) 31 | println("precision τ = ", mean(τs), " ± ", std(τs), ", truth = ", 1/sigma) -------------------------------------------------------------------------------- /FactorGraphs/gauss-bayes-rule.jl: -------------------------------------------------------------------------------- 1 | # Bayes rule for a linear Gaussian model 2 | # X -> Y, observe Y, infer X 3 | # Y ~ N(a x + b, R) 4 | # X ~ N(0, Q) 5 | 6 | using ForneyLab, LinearAlgebra 7 | 8 | 9 | ###################### Scalar example 10 | g = FactorGraph() 11 | A = 0.2 12 | b = 0.0 13 | Q = 1 14 | R = 0. 15 | y_data = 1.2 16 | 17 | @RV x ~ GaussianMeanVariance(0.0, Q) 18 | @RV n_t ~ GaussianMeanVariance(0.0, R) 19 | @RV y = A*x + b + n_t 20 | placeholder(y, :y) 21 | 22 | println("generating inference code") 23 | algo = Meta.parse(sumProductAlgorithm(x)) 24 | println("Compiling") 25 | eval(algo) # Load algorithm 26 | 27 | data = Dict(:y => y_data) 28 | 29 | println("running inference") 30 | marginals = step!(data); 31 | 32 | #################### 33 | # Multivariate example 34 | #https://github.com/biaslab/ForneyLab.jl/issues/17 35 | 36 | 37 | g = FactorGraph() 38 | 39 | nhidden = 4 40 | nobs = 2 41 | A = [1 0 0 0; 42 | 0 1 0 0] 43 | b = zeros(Float64, nobs) 44 | Q = eye(nhidden) 45 | R = 0.1*eye(nobs) 46 | y_data = randn(nobs) 47 | 48 | @RV x ~ GaussianMeanVariance(zeros(nhidden), Q) 49 | @RV obs_noise ~ GaussianMeanVariance(zeros(nobs), R) 50 | @RV y = A*x + b + obs_noise 51 | placeholder(y, :y, dims=(nobs,)) # add clamping 52 | 53 | algo = Meta.parse(sumProductAlgorithm(x)) 54 | eval(algo) # Load algorithm 55 | data = Dict(:y => y_data) 56 | marginals = step!(data); 57 | post = marginals[:x] 58 | -------------------------------------------------------------------------------- /FactorGraphs/compose_nodes.jl: -------------------------------------------------------------------------------- 1 | 2 | # Composite nodes 3 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/6_composite_nodes.ipynb 4 | 5 | using ForneyLab 6 | 7 | # Define factor graph for x1 = x0 + b*u1, where x0 and u1 have Gaussian priors, and b is a constant. 8 | # This is a part of the information filter graph from the introduction. 9 | g = FactorGraph() 10 | 11 | b = [1.0; 0.5] 12 | 13 | @RV x_0 ~ GaussianMeanVariance(ones(2), eye(2)) 14 | @RV u_1 ~ GaussianMeanVariance(1.0, 1.0) 15 | @RV x_1 = x_0 + b*u_1; 16 | 17 | flat_schedule = sumProductSchedule(x_1) 18 | 19 | ForneyLab.draw(g, schedule=flat_schedule) # Inspect the resulting schedule 20 | 21 | println(flat_schedule) 22 | 23 | # Define a composite node for z = x + b*y 24 | @composite GainAddition (z, x, y) begin 25 | # Specify the 'internal factor graph' of the GainAddion composite node. 26 | # z, x, and y can be used as if they are existing Variables in this block. 27 | b = [1.0; 0.5] 28 | 29 | @RV z = x + b*y 30 | end 31 | 32 | g2 = FactorGraph() 33 | 34 | @RV x_0 ~ GaussianMeanVariance(ones(2), eye(2)) 35 | @RV u_1 ~ GaussianMeanVariance(1.0, 1.0) 36 | @RV x_1 ~ GainAddition(x_0, u_1); 37 | 38 | composite_schedule = sumProductSchedule(x_1) 39 | 40 | ForneyLab.draw(g2, schedule=composite_schedule) 41 | 42 | println(composite_schedule) 43 | 44 | println(composite_schedule[end].internal_schedule) 45 | 46 | @sumProductRule(:node_type => GainAddition, # our custom composite node 47 | :outbound_type => Message{GaussianMeanPrecision}, # this rule produces a GaussianMeanPrecision msg 48 | :inbound_types => (Nothing, Message{Gaussian}, Message{Gaussian}), # msg towards first interface, incoming types 49 | :name => SPGainAdditionOutVGG) # name of the update rule; 50 | 51 | shortcut_schedule = sumProductSchedule(x_1) 52 | println(shortcut_schedule) 53 | -------------------------------------------------------------------------------- /FactorGraphs/kalman_smoother.jl: -------------------------------------------------------------------------------- 1 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/2_state_estimation_forward_backward.ipynb 2 | # Forward backward smoothing 3 | 4 | # Data 5 | Random.seed!(1) 6 | n_samples = 20 7 | x_data = [t for t=1:n_samples] # State 8 | y_data = x_data + sqrt(200.0)*randn(n_samples); # Noisy observations of state 9 | 10 | using ForneyLab 11 | 12 | g = FactorGraph() 13 | 14 | # Prior statistics 15 | m_x_0 = placeholder(:m_x_0) 16 | v_x_0 = placeholder(:v_x_0) 17 | 18 | # State prior 19 | @RV x_0 ~ GaussianMeanVariance(m_x_0, v_x_0) 20 | 21 | # Transition and observation model 22 | x = Vector{Variable}(undef, n_samples) 23 | y = Vector{Variable}(undef, n_samples) 24 | 25 | x_t = x_0 26 | for t = 1:n_samples 27 | global x_t 28 | @RV n_t ~ GaussianMeanVariance(0.0, 200.0) # observation noise 29 | @RV x[t] = x_t + 1.0 30 | # Name variable for ease of lookup 31 | sym = Symbol("x_", t) 32 | x[t].id = sym #:x_t; 33 | println(sym) 34 | @RV y[t] = x[t] + n_t 35 | 36 | # Data placeholder 37 | placeholder(y[t], :y, index=t) 38 | 39 | # Reset state for next step 40 | x_t = x[t] 41 | end 42 | 43 | println("generating inference code") 44 | algo = Meta.parse(sumProductAlgorithm(x)) 45 | println("Compiling") 46 | eval(algo) # Load algorithm 47 | 48 | # Prepare data dictionary and prior statistics 49 | data = Dict(:y => y_data, 50 | :m_x_0 => 0.0, 51 | :v_x_0 => 1000.0) 52 | 53 | # Execute algorithm 54 | println("running forwards backwads") 55 | marginals = step!(data); 56 | # Extract posterior statistics 57 | m_x = [mean(marginals[:x_*t]) for t = 1:n_samples] 58 | v_x = [var(marginals[:x_*t]) for t = 1:n_samples] 59 | 60 | using Plots; pyplot() 61 | 62 | xs = 1:n_samples 63 | scatter(xs, y_data, color=[:blue], label="y") 64 | plot!(xs, x_data, color=[:black], label="x") 65 | plot!(xs, m_x, color=[:red], label="estimated x") 66 | #grid("on") 67 | ys = m_x; l = sqrt.(v_x); u = sqrt.(v_x); 68 | #fill_between(xs, y.-l, y.+_u, color="b", alpha=0.3) 69 | plot!([ys ys], fillrange=[ys.-l ys.+u], fillalpha=0.3, c=:red) 70 | xlabel!("t") 71 | fname = "Figures/forney-kalman-smoother.png" 72 | savefig(fname) 73 | -------------------------------------------------------------------------------- /FactorGraphs/kalman_smoother_1d.jl: -------------------------------------------------------------------------------- 1 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/2_state_estimation_forward_backward.ipynb 2 | # Forward backward smoothing 3 | 4 | # Data 5 | Random.seed!(1) 6 | n_samples = 10 7 | x_data = [t for t=1:n_samples] # State 8 | y_data = x_data + sqrt(200.0)*randn(n_samples); # Noisy observations of state 9 | 10 | using ForneyLab 11 | 12 | g = FactorGraph() 13 | 14 | # Prior statistics 15 | #m_x_0 = placeholder(:m_x_0) 16 | #v_x_0 = placeholder(:v_x_0) 17 | #@RV x_0 ~ GaussianMeanVariance(m_x_0, v_x_0) 18 | @RV x_0 ~ GaussianMeanVariance(0, 1000) 19 | 20 | # Transition and observation model 21 | x = Vector{Variable}(undef, n_samples) 22 | y = Vector{Variable}(undef, n_samples) 23 | 24 | x_prev = x_0 25 | for t = 1:n_samples 26 | global x_prev 27 | @RV n_t ~ GaussianMeanVariance(0.0, 200.0) # observation noise 28 | @RV x[t] = x_prev + 1.0 29 | # Name variable for ease of lookup 30 | sym = Symbol("x_", t) 31 | x[t].id = sym #:x_t; 32 | @RV y[t] = x[t] + n_t 33 | 34 | # Data placeholder 35 | placeholder(y[t], :y, index=t) 36 | 37 | # Reset state for next step 38 | x_prev = x[t] 39 | end 40 | 41 | println("generating inference code") 42 | algo = Meta.parse(sumProductAlgorithm(x)) 43 | println("Compiling") 44 | eval(algo) # Load algorithm 45 | 46 | # Prepare data dictionary and prior statistics 47 | data = Dict(:y => y_data) 48 | 49 | #= 50 | data = Dict(:y => y_data, 51 | :m_x_0 => 0.0, 52 | :v_x_0 => 1000.0) 53 | =# 54 | 55 | # Execute algorithm 56 | println("running forwards backwads") 57 | marginals = step!(data); 58 | # Extract posterior statistics 59 | m_x = [mean(marginals[:x_*t]) for t = 1:n_samples] 60 | v_x = [var(marginals[:x_*t]) for t = 1:n_samples] 61 | 62 | 63 | 64 | import Plots; 65 | # pyplot() 66 | xs = 1:n_samples 67 | Plots.scatter(xs, y_data, color=[:blue], label="y") 68 | Plots.plot!(xs, x_data, color=[:black], label="x") 69 | Plots.plot!(xs, m_x, color=[:red], label="estimated x") 70 | #grid("on") 71 | ys = m_x; l = sqrt.(v_x); u = sqrt.(v_x); 72 | #fill_between(xs, y.-l, y.+_u, color="b", alpha=0.3) 73 | Plots.plot!([ys ys], fillrange=[ys.-l ys.+u], fillalpha=0.3, c=:red) 74 | Plots.xlabel!("t") 75 | gcf() 76 | -------------------------------------------------------------------------------- /FactorGraphs/vb_unigauss.jl: -------------------------------------------------------------------------------- 1 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/3_variational_estimation_iid_gaussian.ipynb 2 | # Variational Bayes for a 1d Gaussian 3 | 4 | # Generate toy data set 5 | Random.seed!(1) 6 | n = 5 7 | m_data = 3.0 8 | w_data = 4.0 9 | y_data = sqrt(1/w_data)*randn(n) .+ m_data; 10 | 11 | using(ForneyLab) 12 | 13 | g = FactorGraph() 14 | 15 | # Priors 16 | @RV m ~ GaussianMeanVariance(0.0, 100.0) 17 | @RV w ~ Gamma(0.01, 0.01) 18 | 19 | # Observarion model 20 | y = Vector{Variable}(undef, n) 21 | for i = 1:n 22 | @RV y[i] ~ GaussianMeanPrecision(m, w) 23 | placeholder(y[i], :y, index=i) 24 | end 25 | 26 | # Specify recognition factorization 27 | q = RecognitionFactorization(m, w, ids=[:M, :W]) 28 | 29 | # Inspect the subgraph for m 30 | #ForneyLab.draw(q.recognition_factors[:M]) 31 | 32 | # Generate the variational update algorithms for each recognition factor 33 | algo = variationalAlgorithm(q) 34 | println(algo) 35 | 36 | algo_F = freeEnergyAlgorithm(q) 37 | println(algo_F) 38 | 39 | # Load algorithms 40 | eval(Meta.parse(algo)) 41 | eval(Meta.parse(algo_F)); 42 | 43 | data = Dict(:y => y_data) 44 | 45 | # Initial recognition distributions 46 | marginals = Dict(:m => vague(GaussianMeanVariance), 47 | :w => vague(Gamma)) 48 | 49 | n_its = 2*n 50 | F = Vector{Float64}(undef, n_its) # Initialize vector for storing Free energy 51 | m_est = Vector{Float64}(undef, n_its) 52 | w_est = Vector{Float64}(undef, n_its) 53 | for i = 1:n_its 54 | stepM!(data, marginals) 55 | stepW!(data, marginals) 56 | 57 | # Store free energy 58 | F[i] = freeEnergy(data, marginals) 59 | 60 | # Store intermediate estimates 61 | m_est[i] = mean(marginals[:m]) 62 | w_est[i] = mean(marginals[:w]) 63 | end 64 | 65 | 66 | 67 | using Plots; pyplot() 68 | 69 | # Plot free energy to check for convergence 70 | plot(1:n_its, F, c=:black, lab="Free energy") 71 | xlabel!("Iteration") 72 | ylabel!("F") 73 | 74 | # Plot estimated mean 75 | ys = m_est; l = 1.0 ./ sqrt.(w_est); u = 1.0 ./ sqrt.(w_est); 76 | plot([ys ys], fillrange=[ys.-l ys.+u], fillalpha=0.3, c=:red, lab="μ") 77 | ylabel!("Estimate") 78 | m_mle = mean(y_data) 79 | m_emp = fill(m_mle, n_its) 80 | plot!(1:n_its, m_emp, c=:blue, lab="\overline{μ}") 81 | -------------------------------------------------------------------------------- /notes.txt: -------------------------------------------------------------------------------- 1 | yc1 = full(sparse(y,1:100,1f0)) 2 | 3 | # Weight initialization for multiple layers: h=array of layer sizes 4 | # Output is an array [w0,b0,w1,b1,...,wn,bn] where wi,bi is the weight matrix and bias vector for the i'th layer 5 | function winit(h...) # use winit(x,h1,h2,...,hn,y) for n hidden layer model 6 | w = Any[] 7 | for i=2:length(h) 8 | push!(w, xavier(h[i],h[i-1])) 9 | push!(w, zeros(h[i],1)) 10 | end 11 | map(Atype, w) 12 | end; 13 | 14 | function convnet(w,x; pdrop=(0,0,0)) # pdrop[1]:input, pdrop[2]:conv, pdrop[3]:fc 15 | for i=1:2:length(w) 16 | if ndims(w[i]) == 4 # convolutional layer 17 | x = dropout(x, pdrop[i==1?1:2]) 18 | x = conv4(w[i],x) .+ w[i+1] 19 | x = pool(relu.(x)) 20 | elseif ndims(w[i]) == 2 # fully connected layer 21 | x = dropout(x, pdrop[i==1?1:3]) 22 | x = w[i]*mat(x) .+ w[i+1] 23 | if i < length(w)-1; x = relu.(x); end 24 | else 25 | error("Unknown layer type: $(size(w[i]))") 26 | end 27 | end 28 | return x 29 | end; 30 | 31 | # Read the imdb dictionary and print the words 32 | imdbvocab = Array{String}(length(imdbdict)) 33 | for (k,v) in imdbdict; imdbvocab[v]=k; end 34 | map(a->imdbvocab[a], xtrn) 35 | 36 | function onehotrows(idx, embeddings) 37 | nrows,ncols = length(idx), size(embeddings,1) 38 | z = zeros(Float32,nrows,ncols) 39 | @inbounds for i=1:nrows 40 | z[i,idx[i]] = 1 41 | end 42 | oftype(AutoGrad.getval(embeddings),z) 43 | end 44 | 45 | #"/home/kpmurphy/.julia/conda/3/bin" 46 | # push!(LOAD_PATH, 47 | 48 | 49 | https://github.com/TuringLang/TuringTutorials/blob/master/3_BayesNN.ipynb 50 | 51 | https://discourse.julialang.org/t/learning-bayesian-data-analysis-using-julia/5370/23 52 | 53 | https://discourse.julialang.org/t/psa-replacement-of-ind2sub-sub2ind-in-julia-0-7/14666 54 | 55 | RSA key with id efb66595564c5646f8fae19c4d619d60bc6b850d not found 56 | 57 | soss.jl: https://cscherrer.github.io/ 58 | mxnet J1.0: https://github.com/apache/incubator-mxnet/issues/13836 59 | 60 | https://julialang.org/learning/ 61 | http://ucidatascienceinitiative.github.io/IntroToJulia/ 62 | 63 | using NBInclude 64 | @nbinclude("myfile.ipynb") 65 | -------------------------------------------------------------------------------- /FactorGraphs/ep_discrete_kf.jl: -------------------------------------------------------------------------------- 1 | 2 | # Expectation propogation for a 1D linear Gaussian SSM with binary observations 3 | 4 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/5_expectation_propagation.ipynb 5 | 6 | 7 | using SpecialFunctions 8 | using Random 9 | # Generate data set 10 | 11 | Random.seed!(123) 12 | n_samples = 40 13 | σ(x) = 0.5 + 0.5*erf.(x./sqrt(2)) 14 | 15 | u_data = 0.1 16 | x_data = [] 17 | y_data = [] 18 | x_prev = -2.0 19 | for t=1:n_samples 20 | global x_prev 21 | push!(x_data, x_prev + u_data + sqrt(0.01)*randn()) # State transition 22 | push!(y_data, σ(x_data[end]) > rand()); # Observation 23 | x_prev = x_data[end] 24 | end 25 | 26 | using ForneyLab 27 | 28 | g = FactorGraph() 29 | 30 | # State prior 31 | @RV x_0 ~ GaussianMeanVariance(0.0, 100.0) 32 | 33 | x = Vector{Variable}(undef, n_samples) 34 | d = Vector{Variable}(undef, n_samples) 35 | y = Vector{Variable}(undef, n_samples) 36 | x_t_min = x_0 37 | for t = 1:n_samples 38 | global x_t_min 39 | @RV d[t] ~ GaussianMeanVariance(u_data, 0.01) 40 | @RV x[t] = x_t_min + d[t] 41 | @RV y[t] ~ Sigmoid(x[t]) 42 | 43 | # Data placeholder 44 | placeholder(y[t], :y, index=t) 45 | 46 | # Reset state for next step 47 | x_t_min = x[t] 48 | end 49 | 50 | println("generating code") 51 | algo = expectationPropagationAlgorithm(x); 52 | println(algo) # Uncomment to inspect algorithm code 53 | 54 | println("compiling code") 55 | eval(Meta.parse(algo)); 56 | 57 | messages = init() 58 | marginals = Dict() 59 | data = Dict(:y => y_data) 60 | 61 | n_its = 4*n_samples 62 | for i = 1:n_its 63 | println("iteration $i") 64 | step!(data, marginals, messages) 65 | end 66 | 67 | 68 | using PyPlot 69 | 70 | clf() 71 | # Extract posterior statistics 72 | m_x = [mean(marginals[:x_*t]) for t = 1:n_samples] 73 | v_x = [var(marginals[:x_*t]) for t = 1:n_samples] 74 | 75 | plot(collect(1:n_samples), x_data, "k--", label="true x") 76 | plot(collect(1:n_samples), m_x, "b-", label="estimated x") 77 | fill_between(collect(1:n_samples), m_x-sqrt.(v_x), m_x+sqrt.(v_x), color="b", alpha=0.3); 78 | grid("on") 79 | xlabel("t") 80 | xlim(1, n_samples) 81 | ylim(-2, 2) 82 | legend(loc=7) 83 | 84 | ax = gca() 85 | ax[:twinx]() 86 | plot(collect(1:n_samples), y_data, "b*", label="y") 87 | yticks([0.0, 1.0], ["False", "True"]); 88 | ylim(-0.1, 1.1); 89 | gcf() 90 | -------------------------------------------------------------------------------- /Old/second-order.jl: -------------------------------------------------------------------------------- 1 | # Based on 2 | #https://github.com/sisl/algforopt-notebooks/blob/master/second-order.ipynb 3 | 4 | using LinearAlgebra 5 | 6 | include("first-order.jl") 7 | 8 | function _line_search(f, x, d) 9 | d = normalize(d) 10 | objective = α -> f(x + α*d) 11 | v, α = f(x), 1e-6 12 | while f(x + α*d) < v 13 | v = f(x + α*d) 14 | α += 1e-6 15 | end 16 | return x + α*d 17 | end 18 | 19 | #abstract type DescentMethod end 20 | mutable struct DFP <: DescentMethod 21 | Q::Matrix{Real} 22 | end 23 | DFP() = DFP(Matrix{Real}(undef, 0, 0)) 24 | function init!(M::DFP, f, ∇f, x) 25 | M.Q = Matrix(1.0I, length(x), length(x)) 26 | return M 27 | end 28 | function step!(M::DFP, f, ∇f, x) 29 | Q, g = M.Q, ∇f(x) 30 | x′ = _line_search(f, x, -Q*g) 31 | g′ = ∇f(x′) 32 | δ = x′ - x 33 | γ = g′ - g 34 | Q[:] = Q - Q*γ*γ'*Q/(γ'*Q*γ) + δ*δ'/(δ'*γ) 35 | return x′ 36 | end 37 | 38 | mutable struct BFGS <: DescentMethod 39 | Q::Matrix{Real} 40 | end 41 | BFGS() = BFGS(Matrix{Real}(undef, 0, 0)) 42 | function init!(M::BFGS, f, ∇f, x) 43 | M.Q = Matrix(1.0I, length(x), length(x)) 44 | return M 45 | end 46 | function step!(M::BFGS, f, ∇f, x) 47 | Q, g = M.Q, ∇f(x) 48 | x′ = _line_search(f, x, -Q*g) 49 | g′ = ∇f(x′) 50 | δ = x′ - x 51 | γ = g′ - g 52 | Q[:] = Q - (δ*γ'*Q + Q*γ*δ')/(δ'*γ) + (1 + (γ'*Q*γ)/(δ'*γ))[1]*(δ*δ')/(δ'*γ) 53 | return x′ 54 | end 55 | 56 | mutable struct LBFGS <: DescentMethod 57 | m::Int # memory length 58 | δs::Array{Vector{Real},1} 59 | γs::Array{Vector{Real},1} 60 | qs::Array{Vector{Real},1} 61 | end 62 | LBFGS(m) = LBFGS(m, Array{Vector{Real},1}(), Array{Vector{Real},1}(), Array{Vector{Real},1}()) 63 | function init!(M::LBFGS, f, ∇f, x) 64 | M.δs = [] 65 | M.γs = [] 66 | M.qs = [] 67 | return M 68 | end 69 | function step!(M::LBFGS, f, ∇f, x) 70 | δs, γs, qs, g = M.δs, M.γs, M.qs, ∇f(x) 71 | m = length(δs) 72 | if m > 0 73 | q = g 74 | for i in m : -1 : 1 75 | qs[i] = copy(q) 76 | q -= (δs[i]⋅q)/(γs[i]⋅δs[i])*γs[i] 77 | end 78 | z = (γs[m] .* δs[m] .* q) / (γs[m]⋅γs[m]) 79 | for i in 1 : m 80 | z += δs[i]*(δs[i]⋅qs[i] - γs[i]⋅z)/(γs[i]⋅δs[i]) 81 | end 82 | x′ = _line_search(f, x, -z) 83 | else 84 | x′ = _line_search(f, x, -g) 85 | end 86 | g′ = ∇f(x′) 87 | push!(δs, x′ - x); 88 | push!(γs, g′ - g) 89 | push!(qs, zeros(length(x))) 90 | while length(δs) > M.m 91 | popfirst!(δs); popfirst!(γs); popfirst!(qs) 92 | end 93 | return x′ 94 | end 95 | -------------------------------------------------------------------------------- /Old/tmp.jl: -------------------------------------------------------------------------------- 1 | # Examples of how to fit a binary logistic regression model 2 | 3 | #https://fluxml.ai/Flux.jl/v0.1.1/examples/logreg.html 4 | #http://marubon-ds.blogspot.com/2018/05/deep-learning-with-julia-introduction.html 5 | 6 | #https://github.com/FluxML/Flux.jl/blob/1cf37ab9eb806368d5702ab8053d5bc5b46180cf/src/layers/stateless.jl 7 | 8 | #https://github.com/FluxML/NNlib.jl/blob/master/src/logsoftmax.jl 9 | 10 | #https://www.linkedin.com/pulse/creating-deep-neural-network-model-learn-handwritten-digits-mike-gold/ 11 | 12 | 13 | import Flux 14 | 15 | 16 | function logreg_binary_sample(w::Array{Float64,1}, X::Array{Float64,2}) 17 | DX, N = size(X) 18 | DW = length(w) 19 | @assert DX == DW 20 | p = [Flux.sigmoid(sum(w .* X[:,i])) for i in 1:N] 21 | labels = p .< rand(N) 22 | #W = reshape(w, (1, DW)) 23 | #labels = (vec(Flux.sigmoid.(W * X)) .< rand(N)) 24 | return labels 25 | end 26 | 27 | 28 | function make_test_data_binary() 29 | Random.seed!(1) 30 | D = 4; 31 | N = 5; 32 | X = randn(D, N) # note that examples are stored in the columns 33 | w = randn(D) 34 | y = logreg_binary_sample(w, X) 35 | return w, X, y 36 | end 37 | 38 | function logreg_binary_nll(w, X, y) 39 | D, N = size(X) 40 | #p1 = [Flux.sigmoid(sum(w .* X[:,i])) for i in 1:N] 41 | W = reshape(w, (1, D)) 42 | p = vec(Flux.sigmoid.(W * X)) # 1xN->N vector 43 | #@assert isapprox(vec(p1), vec(p)) 44 | return -sum(y .* log.(p) + (1 .- y) .* log.(1 .- p)) / N 45 | end 46 | 47 | function logreg_binary_nll_test() 48 | w, X, y = make_test_data_binary() 49 | D, N = size(X) 50 | nll1 = logreg_binary_nll(w, X, y) 51 | W = reshape(w, (1, D)); 52 | logits = vec(W * X) 53 | nll2 = sum(Flux.logitbinarycrossentropy.(logits, y)) / N 54 | @test isapprox(nll1, nll2) 55 | end 56 | 57 | function logreg_binary_nll_grad(w, X, y) 58 | D, N = size(X) 59 | g = zeros(Float64,D) 60 | for i=1:N 61 | x = X[:,i] 62 | p = Flux.sigmoid(sum(w .* x)) 63 | g[:] = g[:] + (p - y[i]) .* x 64 | end 65 | return g / N 66 | end 67 | 68 | function logreg_binary_nll_grad_test() 69 | w0, X, y = make_test_data_binary() 70 | g1 = logreg_binary_nll_grad(w0, X, y) 71 | g2 = Flux.gradient(w -> logreg_binary_nll(w, X, y), w0)[1] 72 | @test isapprox(g1, g2) 73 | end 74 | 75 | 76 | import Optim 77 | function logreg_fit_test() 78 | W, X, Y = make_test_data() 79 | w0 = vec(randn(size(W))) 80 | objective(w) = logreg_nll(w, X, Y) 81 | result = Optim.optimize(objective, w0, Optim.LBFGS()) 82 | West = reshape(Optim.minimizer(result), (C,D)) 83 | ncalls = result.iterations 84 | end 85 | 86 | logreg_binary_nll_test() 87 | logreg_binary_nll_grad_test() 88 | -------------------------------------------------------------------------------- /FactorGraphs/tmp.jl: -------------------------------------------------------------------------------- 1 | # Bayes rule for a linear Gaussian model 2 | # X -> Y, observe Y, infer X 3 | # Y ~ N(A x + b, R) 4 | # X ~ N(mu, Q) 5 | 6 | # We consider the example from MLAPP sec 4.4.2.2 7 | # where A=I, b=0, mu=[0.5 0.5], Q=0.1I, R=0.1*[2 1; 1 1] 8 | #https://github.com/probml/pmtk3/blob/master/demos/gaussInferParamsMean2d.m 9 | 10 | using ForneyLab 11 | import LinearAlgebra:Diagonal 12 | using Distributions 13 | 14 | function make_model() 15 | g = FactorGraph() 16 | 17 | hidden_dim = 2 18 | obs_dim = 2 19 | Q = 0.1*eye(hidden_dim) 20 | mu_x = [0.5, 0.5] 21 | A = eye(2) 22 | b = [0.0, 0.0] 23 | R = 0.1 .* [2 1; 1 1] 24 | y_mean = A*mu_x + b 25 | y_dist = Distributions.MvNormal(y_mean, R) 26 | n_data = 10 27 | y_data_all = rand(y_dist, n_data) # n_data x obs_dim 28 | y_data = vec(mean(y_data_all, dims=2)) 29 | 30 | @RV x ~ GaussianMeanVariance(mu_x, Q) 31 | mu_y = A*x + b 32 | @RV y ~ GaussianMeanVariance(mu_y, R) 33 | placeholder(y, :y, dims=(obs_dim,)) # add clamping 34 | 35 | return x, y, y_data 36 | end 37 | 38 | x, y, y_data = make_model() 39 | algo = Meta.parse(sumProductAlgorithm(x)) 40 | eval(algo) # compile the step! function 41 | data = Dict(:y => y_data) 42 | marginals = step!(data); 43 | post = marginals[:x] 44 | post_gauss = Distributions.MvNormal(mean(post), cov(post)) 45 | prior_gauss = Distributions.MvNormal([0.5, 0.5], 0.1*eye(2)) 46 | 47 | using Plots; pyplot() 48 | xrange = -1.5:0.1:1.5 49 | yrange = xrange 50 | contour(xrange, yrange, (x,y)->Distributions.pdf(prior_gauss,[x,y]), reuse=false) 51 | title!("Prior") 52 | 53 | contour(xrange, yrange, (x,y)->Distributions.pdf(post_gauss,[x,y]), reuse=false) 54 | title!("Posterior") 55 | 56 | #= 57 | 58 | import ForneyLab 59 | const FL = ForneyLab 60 | import ForneyLab.@RV 61 | import LinearAlgebra 62 | 63 | function make_model() 64 | g = FL.FactorGraph() 65 | 66 | Q = 0.1*FL.eye(2) 67 | mu_x = [0.5, 0.5] 68 | A = FL.eye(2) 69 | b = [0.0, 0.0] 70 | R = 0.1 .* [2 1; 1 1] 71 | y_data = A*mu_x + b + randn(2) # Should sample using noise ~ R 72 | 73 | @RV x ~ FL.GaussianMeanVariance(mu_x, Q) 74 | #@RV obs_noise ~ GaussianMeanVariance(zeros(nobs), R) 75 | #@RV y = A*x + b + obs_noise 76 | mu_y = A*x + b 77 | @RV y ~ FL.GaussianMeanVariance(mu_y, R) 78 | FL.placeholder(y, :y, dims=(nobs,)) # add clamping 79 | 80 | return x, y, y_data 81 | end 82 | 83 | x, y, y_data = make_model() 84 | 85 | algo = Meta.parse(FL.sumProductAlgorithm(x)) 86 | eval(algo) # compile the step! function 87 | data = Dict(:y => y_data) 88 | marginals = step!(data); 89 | post = marginals[:x] 90 | =# 91 | -------------------------------------------------------------------------------- /FactorGraphs/kalman_filter.jl: -------------------------------------------------------------------------------- 1 | 2 | 3 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/1_state_estimation_forward_only.ipynb 4 | # Kalman filtering 5 | 6 | #Data 7 | Random.seed!(1) 8 | n_samples = 20 9 | x = [t for t=1:n_samples] # True state 10 | y = x + sqrt(200.0)*randn(n_samples); # Noisy observations of state 11 | 12 | using ForneyLab 13 | 14 | g = FactorGraph() 15 | 16 | # declare priors as random variables 17 | @RV m_x_t_min # m_x_t_min = Variable(id=:m_x_t_min) 18 | @RV v_x_t_min # v_x_t_min = Variable(id=:v_x_t_min) 19 | @RV x_t_min ~ GaussianMeanVariance(m_x_t_min, v_x_t_min) 20 | 21 | # System equations 22 | # u = 1.0; v = 200.0 23 | @RV n_t ~ GaussianMeanVariance(0.0, 200.0) 24 | @RV x_t = x_t_min + 1.0 25 | @RV y_t = x_t + n_t 26 | 27 | # Name variable for ease of lookup 28 | #x_t.id = :x_t; 29 | 30 | # Placeholders for prior 31 | placeholder(m_x_t_min, :m_x_t_min) # placeholder(:m_x_t_min) does not work 32 | placeholder(v_x_t_min, :v_x_t_min) 33 | 34 | # Placeholder for data 35 | placeholder(y_t, :y_t); 36 | 37 | #ForneyLab.draw(g) 38 | 39 | algo = sumProductAlgorithm(x_t) # Figure out a schedule and compile to Julia code 40 | println(algo) 41 | # Define algorithm 42 | eval(Meta.parse(algo)) 43 | 44 | # Define values for prior statistics 45 | m_x_0 = 0.0 46 | v_x_0 = 1000.0 47 | 48 | m_x = Vector{Float64}(undef, n_samples) 49 | v_x = Vector{Float64}(undef, n_samples) 50 | 51 | 52 | m_x_t = m_x_0 53 | v_x_t = v_x_0 54 | for t = 1:n_samples 55 | # To allow assignment to global variables (in REPL scope), 56 | # we need to declare them: 57 | # https://github.com/JuliaLang/julia/issues/28789 58 | global m_x_t, v_x_t 59 | # Prepare data and prior statistics 60 | data = Dict(:y_t => y[t], 61 | :m_x_t_min => m_x_t, 62 | :v_x_t_min => v_x_t) 63 | 64 | # Execute algorithm 65 | marginals = step!(data) 66 | 67 | # Extract posterior statistics 68 | m_x_t = mean(marginals[:x_t]) 69 | v_x_t = var(marginals[:x_t]) 70 | 71 | # Store to buffer 72 | m_x[t] = m_x_t 73 | v_x[t] = v_x_t 74 | end 75 | 76 | 77 | #http://docs.juliaplots.org/latest/examples/pyplot/ 78 | #ys = Vector[rand(10), rand(20)] 79 | #plot(ys, color=[:black :orange], line=(:dot, 4), marker=([:hex :d], 12, 0.8, Plots.stroke(3, :gray))) 80 | 81 | #using PyPlot 82 | using Plots; pyplot() 83 | 84 | xs = 1:n_samples 85 | scatter(xs, y, color=[:blue], label="y") 86 | plot!(xs, x, color=[:black], label="x") 87 | plot!(xs, m_x, color=[:red], label="estimated x") 88 | #grid("on") 89 | ys = m_x; l = sqrt.(v_x); u = sqrt.(v_x) 90 | #fill_between(xs, y.-l, y.+_u, color="b", alpha=0.3) 91 | plot!([ys ys], fillrange=[ys.-l ys.+u], fillalpha=0.3, c=:red) 92 | xlabel!("t") 93 | #legend!(loc="upper left"); 94 | fname = "Figures/forney-kalman-filter.png" 95 | savefig(fname) 96 | 97 | 98 | #== 99 | plot(collect(1:n_samples), y, "b*", label="y") 100 | plot(collect(1:n_samples), x, "k--", label="true x") 101 | 102 | plot(collect(1:n_samples), m_x, "b-", label="estimated x") 103 | fill_between(collect(1:n_samples), m_x-sqrt.(v_x), m_x+sqrt.(v_x), color="b", alpha=0.3); 104 | grid("on") 105 | xlabel("t") 106 | legend(loc="upper left"); 107 | 108 | PyPlot.plot(collect(1:n_samples), y, "b*", label="y") 109 | PyPlot.plot(collect(1:n_samples), x, "k--", label="true x") 110 | PyPlot.plot(collect(1:n_samples), m_x, "b-", label="estimated x") 111 | PyPlot.fill_between(collect(1:n_samples), m_x-sqrt.(v_x), m_x+sqrt.(v_x), color="b", alpha=0.3); 112 | PyPlot.grid("on") 113 | PyPlot.xlabel("t") 114 | PyPlot.legend(loc="upper left"); 115 | =# 116 | -------------------------------------------------------------------------------- /FactorGraphs/kalman_filter_1d.jl: -------------------------------------------------------------------------------- 1 | 2 | 3 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/1_state_estimation_forward_only.ipynb 4 | # Kalman filtering 5 | 6 | #Data 7 | Random.seed!(1) 8 | n_samples = 20 9 | x = [t for t=1:n_samples] # True state 10 | y = x + sqrt(200.0)*randn(n_samples); # Noisy observations of state 11 | 12 | using ForneyLab 13 | 14 | g = FactorGraph() 15 | 16 | # declare priors as random variables 17 | @RV m_x_t_min # m_x_t_min = Variable(id=:m_x_t_min) 18 | @RV v_x_t_min # v_x_t_min = Variable(id=:v_x_t_min) 19 | @RV x_t_min ~ GaussianMeanVariance(m_x_t_min, v_x_t_min) 20 | 21 | # System equations 22 | # u = 1.0; v = 200.0 23 | @RV n_t ~ GaussianMeanVariance(0.0, 200.0) 24 | @RV x_t = x_t_min + 1.0 25 | @RV y_t = x_t + n_t 26 | 27 | # Name variable for ease of lookup 28 | #x_t.id = :x_t; 29 | 30 | # Placeholders for prior 31 | placeholder(m_x_t_min, :m_x_t_min) # placeholder(:m_x_t_min) does not work 32 | placeholder(v_x_t_min, :v_x_t_min) 33 | 34 | # Placeholder for data 35 | placeholder(y_t, :y_t); 36 | 37 | #ForneyLab.draw(g) 38 | 39 | algo = sumProductAlgorithm(x_t) # Figure out a schedule and compile to Julia code 40 | println(algo) 41 | # Define algorithm 42 | eval(Meta.parse(algo)) 43 | 44 | # Define values for prior statistics 45 | m_x_0 = 0.0 46 | v_x_0 = 1000.0 47 | 48 | m_x = Vector{Float64}(undef, n_samples) 49 | v_x = Vector{Float64}(undef, n_samples) 50 | 51 | 52 | m_x_t = m_x_0 53 | v_x_t = v_x_0 54 | for t = 1:n_samples 55 | # To allow assignment to global variables (in REPL scope), 56 | # we need to declare them: 57 | # https://github.com/JuliaLang/julia/issues/28789 58 | global m_x_t, v_x_t 59 | # Prepare data and prior statistics 60 | data = Dict(:y_t => y[t], 61 | :m_x_t_min => m_x_t, 62 | :v_x_t_min => v_x_t) 63 | 64 | # Execute algorithm 65 | marginals = step!(data) 66 | 67 | # Extract posterior statistics 68 | m_x_t = mean(marginals[:x_t]) 69 | v_x_t = var(marginals[:x_t]) 70 | 71 | # Store to buffer 72 | m_x[t] = m_x_t 73 | v_x[t] = v_x_t 74 | end 75 | 76 | 77 | #http://docs.juliaplots.org/latest/examples/pyplot/ 78 | #ys = Vector[rand(10), rand(20)] 79 | #plot(ys, color=[:black :orange], line=(:dot, 4), marker=([:hex :d], 12, 0.8, Plots.stroke(3, :gray))) 80 | 81 | #using PyPlot 82 | using Plots; pyplot() 83 | 84 | xs = 1:n_samples 85 | scatter(xs, y, color=[:blue], label="y") 86 | plot!(xs, x, color=[:black], label="x") 87 | plot!(xs, m_x, color=[:red], label="estimated x") 88 | #grid("on") 89 | ys = m_x; l = sqrt.(v_x); u = sqrt.(v_x) 90 | #fill_between(xs, y.-l, y.+_u, color="b", alpha=0.3) 91 | plot!([ys ys], fillrange=[ys.-l ys.+u], fillalpha=0.3, c=:red) 92 | xlabel!("t") 93 | #legend!(loc="upper left"); 94 | fname = "Figures/forney-kalman-filter.png" 95 | savefig(fname) 96 | 97 | 98 | #== 99 | plot(collect(1:n_samples), y, "b*", label="y") 100 | plot(collect(1:n_samples), x, "k--", label="true x") 101 | 102 | plot(collect(1:n_samples), m_x, "b-", label="estimated x") 103 | fill_between(collect(1:n_samples), m_x-sqrt.(v_x), m_x+sqrt.(v_x), color="b", alpha=0.3); 104 | grid("on") 105 | xlabel("t") 106 | legend(loc="upper left"); 107 | 108 | PyPlot.plot(collect(1:n_samples), y, "b*", label="y") 109 | PyPlot.plot(collect(1:n_samples), x, "k--", label="true x") 110 | PyPlot.plot(collect(1:n_samples), m_x, "b-", label="estimated x") 111 | PyPlot.fill_between(collect(1:n_samples), m_x-sqrt.(v_x), m_x+sqrt.(v_x), color="b", alpha=0.3); 112 | PyPlot.grid("on") 113 | PyPlot.xlabel("t") 114 | PyPlot.legend(loc="upper left"); 115 | =# 116 | -------------------------------------------------------------------------------- /Old/utils.jl: -------------------------------------------------------------------------------- 1 | 2 | using Test, LinearAlgebra, Flux 3 | 4 | #https://discourse.julialang.org/t/psa-replacement-of-ind2sub-sub2ind-in-julia-0-7/14666/20 5 | function lin2cart(shape, indices) 6 | #return Tuple.(CartesianIndices(shape)[indices]) 7 | return [Base._ind2sub(Tuple(shape), i) for i in indices] 8 | end 9 | 10 | function cart2lin(shape, indices) 11 | #return LinearIndices(shape)[CartesianIndex.(indices)] 12 | return [Base._sub2ind(Tuple(shape), i...) for i in indices] 13 | end 14 | 15 | function normalize_probdist(probdist) 16 | s = sum(probdist) 17 | return probdist ./ s 18 | end 19 | 20 | # Source: 21 | # https://github.com/QuantEcon/QuantEcon.lectures.code/blob/master/kalman/gaussian_contours.jl 22 | function bivariate_normal(X::Matrix, Y::Matrix, σ_x::Real=1.0, σ_y::Real=1.0, 23 | μ_x::Real=0.0, μ_y::Real=0.0, σ_xy::Real=0.0) 24 | Xμ = X .- μ_x 25 | Yμ = Y .- μ_y 26 | ρ = σ_xy/(σ_x*σ_y) 27 | z = Xμ.^2/σ_x^2 + Yμ.^2/σ_y^2 - 2*ρ.*Xμ.*Yμ/(σ_x*σ_y) 28 | denom = 2π*σ_x*σ_y*sqrt(1-ρ^2) 29 | return exp.(-z/(2*(1-ρ^2))) ./ denom 30 | end 31 | 32 | 33 | # https://github.com/probml/pmtk3/blob/master/matlabTools/graphics/gaussPlot2d.m 34 | function plot_gauss2d(m, C) 35 | U = eigvecs(C) 36 | D = eigvals(C) 37 | N = 100 38 | t = range(0, stop=2*pi, length=N) 39 | xy = zeros(Float64, 2, N) 40 | xy[1,:] = cos.(t) 41 | xy[2,:] = sin.(t) 42 | #k = sqrt(6) # approx sqrt(chi2inv(0.95, 2)) = 2.45 43 | k = 1.0 44 | w = (k * U * Diagonal(sqrt.(D))) * xy # 2*N 45 | #Plots.scatter!([m[1]], [m[2]], marker=:star, label="") 46 | handle = Plots.plot!(w[1,:] .+ m[1], w[2,:] .+ m[2], label="") 47 | return handle 48 | end 49 | 50 | function plot_gauss2d_test() 51 | m = [0.0, 0.0] 52 | #C = randn(2,2); C = C*C'; 53 | C = [1.0 0.0; 0.0 3.0]; 54 | @test isposdef(C) 55 | Plots.scatter([m[1]], [m[2]], marker=:star) 56 | plot_2dgauss(m, C) 57 | end 58 | 59 | #Compute scalar-valued function fn(x) and its derivative using Flux's 60 | #revere mode AD. 61 | function fun_and_grad(fn, x) 62 | # Based on https://github.com/tpapp/LogDensityProblems.jl/blob/master/src/AD_Flux.jl 63 | y, back = Flux.Tracker.forward(fn, x) 64 | yval = Flux.Tracker.data(y) 65 | g = first(Flux.Tracker.data.(back(1))) 66 | return yval, g 67 | end 68 | 69 | function fun_and_grad_test() 70 | f(x) = x[1]^2 + 100*x[2]^2 71 | g(x) = [2*x[1], 200*x[2]] 72 | x = [1,1] 73 | y1 = f(x) 74 | g1 = g(x) 75 | y2, g2 = fun_and_grad(f, x) 76 | @test isapprox(y1, y2) 77 | @test isapprox(g1, g2) 78 | end 79 | 80 | 81 | 82 | #http://julianlsolvers.github.io/Optim.jl/stable/#user/minimization/ 83 | # The minimum value of 0.0 is at (a,a^2). 84 | function rosenbrock(x; a=1.0, b=100.0) 85 | return (a - x[1])^2 + b * (x[2] - x[1]^2)^2 86 | end 87 | 88 | function rosenbrock_grad!(g, x; a=1.0, b=100.0) 89 | g[1] = -2.0 * (a-x[1]) -4*b*(x[2] - x[1]^2) * x[1] 90 | g[2] = 2.0 * b * (x[2] - x[1]^2) 91 | end 92 | 93 | function rosenbrock_grad(x; a=1.0, b=100.0) 94 | g = Vector{Float64}(undef, 2) 95 | rosenbrock_grad!(g, x, a=a, b=b) 96 | return g 97 | end 98 | 99 | function rosenbrock_hessian(x; a=1.0, b=100.0) 100 | H = zeros(2,2) 101 | H[1,1] = 2 + 8*b * x[2]; 102 | H[1,2] = -4 * b * x[1]; 103 | H[2,1] = H[1,2]; 104 | H[2,2] = 2 * b; 105 | return H 106 | end 107 | 108 | function rosenbrock_condition() 109 | H = rosenbrock_hessian([1,1]) 110 | println("condition number = $(cond(H))") 111 | evals = eigvals(H) 112 | @assert isapprox(cond(H), evals[2]/evals[1]) 113 | end 114 | 115 | function rosenbrock_grad_test() 116 | x = randn(2) 117 | y, g = fun_and_grad(rosenbrock, x) 118 | @test isapprox(y, rosenbrock(x)) 119 | @test isapprox(g, rosenbrock_grad(x)) 120 | g2 = Tracker.gradient(rosenbrock, x)[1] 121 | @test isapprox(g, g2) 122 | end 123 | -------------------------------------------------------------------------------- /Old/kalman_tracking_demo.jl: -------------------------------------------------------------------------------- 1 | 2 | # https://github.com/probml/pmtk3/blob/bb64a174c4ded561cfdba26d835e1aaa7221c368/demos/kalmanTrackingDemo.m 3 | 4 | 5 | using LinearAlgebra, Test 6 | import Distributions 7 | include("kalman_qe.jl") 8 | 9 | function make_params() 10 | F = [1 0 1 0; 0 1 0 1; 0 0 1 0; 0 0 0 1.0]; 11 | H = [1.0 0 0 0; 0 1 0 0]; 12 | nobs, nhidden = size(H) 13 | Q = Matrix(I, nhidden, nhidden) .* 0.001 14 | R = Matrix(I, nobs, nobs) .* 0.1 # 1.0 15 | mu0 = [8, 10, 1, 0.0]; 16 | V0 = Matrix(I, nhidden, nhidden) .* 1.0 17 | params = (mu0 = mu0, V0 = V0, F = F, H = H, Q = Q, R = R) 18 | return params 19 | end 20 | 21 | 22 | # Returns xs: H*T, ys: O*T 23 | function kalman_sample(kn::Kalman, T::Int) 24 | nobs, nhidden = size(kn.G) 25 | xs = Array{Float64}(undef, nhidden, T) 26 | ys = Array{Float64}(undef, nobs, T) 27 | mu0 = kn.cur_x_hat 28 | V0 = kn.cur_sigma 29 | prior_z = Distributions.MvNormal(mu0, V0) 30 | process_noise_dist = Distributions.MvNormal(kn.Q) 31 | obs_noise_dist = Distributions.MvNormal(kn.R) 32 | xs[:,1] = rand(prior_z) 33 | ys[:,1] = kn.G*zs[:,1] + rand(obs_noise_dist) 34 | for t=2:T 35 | xs[:,t] = kn.A*xs[:,t-1] + rand(process_noise_dist) 36 | ys[:,t] = kn.G*xs[:,t] + rand(obs_noise_dist) 37 | end 38 | return xs, ys 39 | end 40 | 41 | 42 | function kalman_filter(kn::Kalman, y::AbstractMatrix) 43 | obs_size, T = size(y) 44 | n = size(kn.G, 2) 45 | @assert n == kn.n 46 | x_filtered = Matrix{Float64}(undef, n, T) 47 | sigma_filtered = Array{Float64}(undef, n, n, T) 48 | sigma_forecast = Array{Float64}(undef, n, n, T) 49 | logL = 0 50 | for t in 1:T 51 | logL = logL + log_likelihood(kn, y[:, t]) 52 | prior_to_filtered!(kn, y[:, t]) 53 | x_filtered[:, t], sigma_filtered[:, :, t] = kn.cur_x_hat, kn.cur_sigma 54 | filtered_to_forecast!(kn) 55 | sigma_forecast[:, :, t] = kn.cur_sigma 56 | end 57 | return x_filtered, logL, sigma_filtered, sigma_forecast 58 | end 59 | 60 | function kalman_smoother(kn::Kalman, y::AbstractMatrix) 61 | T = size(y, 2) 62 | x_filtered, logL, sigma_filtered, sigma_forecast = kalman_filter(kn, y) 63 | x_smoothed = copy(x_filtered) 64 | sigma_smoothed = copy(sigma_filtered) 65 | for t in (T-1):-1:1 66 | x_smoothed[:, t], sigma_smoothed[:, :, t] = 67 | go_backward(kn, x_filtered[:, t], sigma_filtered[:, :, t], 68 | sigma_forecast[:, :, t], x_smoothed[:, t+1], 69 | sigma_smoothed[:, :, t+1]) 70 | end 71 | return x_smoothed, logL, sigma_smoothed 72 | end 73 | 74 | include("utils.jl") 75 | function do_plot(zs, ys, m, V) 76 | # m is H*T, V is H*H*T, where H=4 hidden states 77 | plt = scatter(ys[1,:], ys[2,:], label="observed", reuse=false) 78 | plt = scatter!(zs[1,:], zs[2,:], label="true", marker=:star) 79 | xlims!(minimum(ys[1,:])-1, maximum(ys[1,:])+1) 80 | ylims!(minimum(ys[2,:])-1, maximum(ys[2,:])+1) 81 | display(plt) 82 | m2 = m[1:2,:] 83 | V2 = V[1:2, 1:2, :] 84 | T = size(m2, 2) 85 | for t=1:T 86 | plt = plot_gauss2d(m2[:,t], V2[:,:,t]) 87 | end 88 | display(plt) 89 | end 90 | 91 | 92 | Random.seed!(2) 93 | T = 10 94 | params = make_params() 95 | F = params.F; H = params.H; Q = params.Q; R = params.R; mu0 = params.mu0; V0 = params.V0; 96 | kf = Kalman(F, H, Q, R) 97 | (zs, ys) = kalman_sample(kf, T) # H*T, O*T 98 | println("inference") 99 | set_state!(kf, mu0, V0) 100 | mF, loglik, VF = kalman_filter(kf, ys) 101 | set_state!(kf, mu0, V0) 102 | mS, loglik, VS = kalman_smoother(kf, ys) 103 | #m, V = kalman_smoother(params, ys) 104 | 105 | println("plotting") 106 | using Plots; pyplot() 107 | closeall() 108 | do_plot(zs, ys, mF, VF); title!("Filtering") 109 | do_plot(zs, ys, mS, VS); title!("Smoothing") 110 | -------------------------------------------------------------------------------- /Old/zigzag.jl: -------------------------------------------------------------------------------- 1 | # Reproduce fig 5.1 from A4O book to illustrate zigzag effect 2 | # of gradient descent with linesearch 3 | 4 | using Vec 5 | using LinearAlgebra 6 | 7 | include("utils.jl") 8 | include("first-order.jl") 9 | 10 | 11 | f(x) = rosenbrock(x, a=1, b=5) 12 | ∇f(x) = rosenbrock_grad(x, a=1, b=5) 13 | ftuple = (x,y) -> f([x,y]) 14 | xdomain = range(-2, stop=2, length=100) 15 | ydomain = range(-2, stop=2, length=100) 16 | x0 = [-1.0, -1.0] 17 | levels=[1,2,3,5,10,20,50,100] 18 | 19 | 20 | N = 50 21 | expts = Tuple{DescentMethod, String}[] 22 | push!(expts, (GradientDescentLineSearchExact(), "exact-linesearch")) 23 | push!(expts, (GradientDescentLineSearch(), "backtracking-linesearch")) 24 | push!(expts, (GradientDescent(0.01), "GD-0.01")) 25 | push!(expts, (GradientDescent(0.05), "GD-0.05")) 26 | push!(expts, (GradientDescent(0.055), "GD-0.055")) 27 | 28 | using Plots 29 | pyplot() 30 | for (M, name) in expts 31 | println("using method $name") 32 | plt = contour(xdomain, ydomain, (x,y)->f([x,y]), levels=levels, reuse=false) 33 | scatter!([x0[1]], [x0[2]], marker=:circle, markercolor=:black, 34 | markersize=5, label="start") 35 | scatter!([1.0], [1.0], marker=:star, markercolor=:red, 36 | markersize=5, label="opt") 37 | 38 | pts = [x0] 39 | ftrace = [f(x0)] 40 | function cb(M, fn, grad, x, iter) 41 | y = fn(x) 42 | g = grad(x) 43 | gnorm = norm(g) 44 | #println("$name, iter=$iter, f=$y, gnorm=$gnorm") 45 | if abs(y)>1e10 || abs(gnorm)>1e10 || abs(gnorm) < 1e-2; return true; end 46 | push!(pts, x) 47 | push!(ftrace, y) 48 | return false 49 | end 50 | xfinal = run_descent_method(M, f, ∇f, x0; max_iter=N, callback=cb) 51 | xtrace = hcat(pts...) # 2xN 52 | #println(ftrace) 53 | plot!(xtrace[1,:], xtrace[2,:], label=name, linecolor=:red, width=2) 54 | display(plt) 55 | savefig(plt, "Figures/zigzag-$name.pdf") 56 | #plt = plot(ftrace, label=name; yaxis=:log) 57 | #display(plt) 58 | end 59 | 60 | 61 | 62 | #= 63 | # Version using arrays 64 | fn(x) = rosenbrock(x, a=1, b=5) 65 | dfn(x) = rosenbrock_grad(x, a=1, b=5) 66 | xdomain = range(-2, stop=2, length=100) 67 | ydomain = range(-2, stop=2, length=100) 68 | x0 = [-1.0, -1.0] 69 | levels=[1,2,3,5,10,20,50,100] 70 | 71 | pts = [x0] 72 | for i in 1 : 10 73 | xcur = pts[end] 74 | g = dfn(xcur) 75 | d = -normalize(g) 76 | f1d = a -> begin 77 | xnew = xcur + a*d 78 | g2 = dfn(xnew) 79 | v1 = dot(g2, d) / norm(d) 80 | return v1 81 | end 82 | alpha = secant_method(f1d, 0.0, 1.0, 0.0001) 83 | push!(pts, xcur + alpha*d) 84 | end 85 | using Plots 86 | pyplot() 87 | plt = contour(xdomain, ydomain, (x,y) -> fn([x,y]), levels=levels, reuse=false) 88 | scatter!([x0[1]], [x0[2]], marker=:star, markercolor=:red, markersize=5) 89 | xtrace = hcat(pts...) # 2xN 90 | plot!(xtrace[1,:], xtrace[2,:]) 91 | display(plt) 92 | =# 93 | 94 | 95 | #= 96 | # Version using Vec (tuples) 97 | # rosenbrock(a=1, b=5) 98 | f = (x,y) -> (1-x)^2 + 5*(y - x^2)^2 99 | df = (x,y) -> [2*(10*x^3-10*x*y+x-1), 10*(y-x^2)] 100 | xdomain = range(-2, stop=2, length=100) 101 | ydomain = range(-2, stop=2, length=100) 102 | p0 = (-1, -1) 103 | 104 | pts2d = Tuple{Float64,Float64}[p0] 105 | for i in 1 : 10 106 | x,y = pts2d[end] 107 | dp = normalize(-VecE2{Float64}(df(x, y)...)) 108 | println("dp $dp") 109 | f1d = a -> begin 110 | x2 = x + a*dp.x 111 | y2 = y + a*dp.y 112 | 113 | da = df(x2, y2) 114 | pa = VecE2{Float64}(da[1], da[2]) 115 | 116 | pp = proj(pa, dp, Float64) 117 | println("da=$da, pp=$pp") 118 | return pp 119 | end 120 | alpha = secant_method(f1d, 0.0, 1.0, 0.0001) 121 | push!(pts2d, (x + alpha*dp.x, y + alpha*dp.y)) 122 | end 123 | using Plots 124 | pyplot() 125 | plt = Plots.contour(xdomain, ydomain, f, levels=[1,2,3,5,10,20,50,100], reuse=false) 126 | Plots.plot!([p[1] for p in pts2d], [p[2] for p in pts2d], label="GD with line search") 127 | display(plt) 128 | =# 129 | -------------------------------------------------------------------------------- /FactorGraphs/lin_probit_regression_corrupted_classlabels.jl: -------------------------------------------------------------------------------- 1 | # Tested with Julia v1.1.0 and ForneyLab v0.9.1 2 | 3 | using LinearAlgebra, PyPlot, ForneyLab 4 | import Distributions: MvNormal 5 | 6 | # Custom message update rule to interpret integers as Categorical Distributions 7 | # with one-hot coding. 8 | @sumProductRule(:node_type => Transition, 9 | :outbound_type => Message{Categorical}, 10 | :inbound_types => (Message{PointMass}, Nothing, Message{PointMass}), 11 | :name => SPTransitionIn1PVP) 12 | 13 | function ruleSPTransitionIn1PVP(msg_out::Message{PointMass, Univariate}, 14 | msg_in1::Nothing, 15 | msg_a::Message{PointMass, MatrixVariate}) 16 | # Convert integer in {1,2} to Categorical with probability vector p 17 | p = zeros(2) 18 | p[msg_out.dist.params[:m]] = 1.0 19 | a = msg_a.dist.params[:m]'*p 20 | 21 | Message(Univariate, Categorical, p=a./sum(a)) 22 | end 23 | 24 | # EP algorithm building 25 | function build_algorithm(dim::Int, N::Int64, alpha::Float64; output_file="ep_algo.jl") 26 | fg = FactorGraph() 27 | A = [1.0-alpha alpha; alpha 1.0-alpha] # label flipping probability matrix 28 | @RV w ~ GaussianMeanVariance(constant(zeros(dim)), constant(100 * eye(dim)), id=:w) 29 | sites = Variable[w] 30 | for t=1:N 31 | x_t = placeholder(Variable(id=:x_*t), :x, index=t, dims=(dim,)) 32 | @RV dp_t ~ DotProduct(w, x_t, id=:dp_*t) 33 | @RV z_t ~ Sigmoid(dp_t, id=:z_*t) # uncorrupted label 34 | @RV y_t ~ Transition(z_t, constant(A, id=:A_*t), id=:y_*t) # corrupted label 35 | placeholder(y_t, :y, index=t, datatype=Int) 36 | push!(sites, dp_t) 37 | end 38 | 39 | # ForneyLab.draw(fg) 40 | ep_schedule = expectationPropagationSchedule(sites) 41 | messagePassingAlgorithm(ep_schedule, marginalSchedule(w), file=output_file) 42 | end 43 | 44 | # Synthetic data generation 45 | function generate_data(N::Int, α::Float64) 46 | X = zeros(3, N) 47 | y = Vector{Int64}(undef, N) 48 | dists = [MvNormal([-1.0; -0.75], 0.5*eye(2)); MvNormal([1.25; 1.0], 0.3*diagm(0 => [1.0; 1.5]))] 49 | for i=1:N 50 | y[i] = rand([1;2]) 51 | X[1:2,i] = rand(dists[y[i]]) 52 | X[3,i] = 1.0 53 | if rand() < α # Flip class label 54 | y[i] = (y[i] == 1) ? 2 : 1 55 | end 56 | end 57 | 58 | return X, y 59 | end 60 | 61 | # Plotting 62 | function plot_state(X, y, β_dist=false) 63 | figure() 64 | scatter(X[1,:], X[2,:], c=float(y).-1.0) 65 | # PyPlot.gray() 66 | 67 | if isa(β_dist, ProbabilityDistribution) 68 | # σ(β[1]*x[1] + β[2]*x[2] + β[3]) = 0.5 69 | # ⇛ β[1]*x[1] + β[2]*x[2] + β[3] = 0 70 | # ⇛ x[2] = - β[1]/β[2]*x[1] - β[3]/β[2] 71 | # = a * x[1] + b 72 | x1 = [minimum(X[1,:]); maximum(X[1,:])] 73 | for i=1:100 74 | β = ForneyLab.sample(β_dist) 75 | a,b = [-1*β[1]; -1*β[3]] ./ β[2] 76 | plot(x1, a*x1.+b, "-", color="k", alpha=0.2) 77 | end 78 | 79 | xlim(x1) 80 | ylim([minimum(X[2,:]); maximum(X[2,:])]) 81 | PyPlot.xticks([]) 82 | PyPlot.yticks([]) 83 | end 84 | # savefig("../figures/lin_class_posterior.eps") 85 | end 86 | 87 | # Generate training data 88 | N = 100 89 | α = 0.2 # Label corruption probability 90 | # Random.seed!(1234) 91 | X_train, y_train = generate_data(N, α) 92 | # plot_state(X_train, y_train) 93 | 94 | # Build inference algorithm 95 | # Comment the following line to skip regeneration of the algorithm 96 | build_algorithm(3, N, α, output_file="ep_algo.jl") 97 | 98 | data = Dict( 99 | :x => [X_train[:,t] for t=1:N], 100 | :y => y_train) 101 | 102 | # Perform inference 103 | include("ep_algo.jl") 104 | messages = init() 105 | marginals = Dict() 106 | for t=1:20 107 | println("Step $t") 108 | step!(data, marginals, messages) 109 | end 110 | println("DONE!") 111 | println(marginals) 112 | 113 | # Plot result 114 | plot_state(X_train, y_train, marginals[:w]) 115 | savefig("lin_class_posterior_robust.pdf") 116 | PyPlot.show() 117 | -------------------------------------------------------------------------------- /FactorGraphs/vb_gmm.jl: -------------------------------------------------------------------------------- 1 | 2 | # Variational Bayes for a 1d GMM 3 | 4 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/4_variational_estimation_gaussian_mixture.ipynb 5 | 6 | # Generate toy data set 7 | 8 | 9 | import Distributions: Normal, pdf 10 | using Random 11 | 12 | Random.seed!(238) 13 | n = 50 14 | y_data = Float64[] 15 | pi_data = 0.3 16 | d1 = Normal(-2., 0.75) 17 | d2 = Normal(1.0, 0.75) 18 | z_data = rand(n) .< pi_data 19 | for i = 1:n 20 | push!(y_data, z_data[i] ? rand(d1) : rand(d2)) 21 | end 22 | 23 | using ForneyLab 24 | 25 | g = FactorGraph() 26 | 27 | # Specify generative model 28 | @RV _pi ~ Beta(1.0, 1.0) 29 | @RV m_1 ~ GaussianMeanVariance(0.0, 100.0) 30 | @RV w_1 ~ Gamma(0.01, 0.01) 31 | @RV m_2 ~ GaussianMeanVariance(0.0, 100.0) 32 | @RV w_2 ~ Gamma(0.01, 0.01) 33 | 34 | z = Vector{Variable}(undef, n) 35 | y = Vector{Variable}(undef, n) 36 | for i = 1:n 37 | @RV z[i] ~ Bernoulli(_pi) 38 | @RV y[i] ~ GaussianMixture(z[i], m_1, w_1, m_2, w_2) 39 | 40 | placeholder(y[i], :y, index=i) 41 | end 42 | 43 | # Specify recognition factorization (mean-field) 44 | q = RecognitionFactorization(_pi, m_1, w_1, m_2, w_2, z, ids=[:PI, :M1, :W1, :M2, :W2, :Z]) 45 | 46 | # Generate the algorithm 47 | println("generating codewords") 48 | algo = variationalAlgorithm(q) 49 | algo_F = freeEnergyAlgorithm(q); 50 | 51 | # Load algorithms 52 | println("compiling code") 53 | eval(Meta.parse(algo)) 54 | eval(Meta.parse(algo_F)); 55 | 56 | data = Dict(:y => y_data) 57 | 58 | # Prepare recognition distributions 59 | marginals = Dict(:_pi => vague(Beta), 60 | :m_1 => ProbabilityDistribution(Univariate, GaussianMeanVariance, m=-1.0, v=1e4), 61 | :w_1 => vague(Gamma), 62 | :m_2 => ProbabilityDistribution(Univariate, GaussianMeanVariance, m=1.0, v=1e4), 63 | :w_2 => vague(Gamma)) 64 | for i = 1:n 65 | marginals[:z_*i] = vague(Bernoulli) 66 | end 67 | 68 | # Execute algorithm 69 | n_its = 10 70 | F = Float64[] 71 | for i = 1:n_its 72 | println("inference step $i") 73 | stepZ!(data, marginals) 74 | stepPI!(data, marginals) 75 | stepM1!(data, marginals) 76 | stepW1!(data, marginals) 77 | stepM2!(data, marginals) 78 | stepW2!(data, marginals) 79 | 80 | # Store variational free energy for visualization 81 | push!(F, freeEnergy(data, marginals)) 82 | end 83 | 84 | using PyPlot 85 | 86 | # Plot free energy to check for convergence 87 | figure(figsize=(8,4)) 88 | subplot(211) 89 | plot(1:n_its, F, color="black") 90 | grid("on") 91 | xlabel("VMP iteration") 92 | ylabel("Variational free energy") 93 | gcf() 94 | 95 | # Plot data 96 | subplot(212) 97 | scatter(y_data[z_data], -0.25*ones(sum(z_data)), color="blue", linewidth=2, marker=".") 98 | scatter(y_data[.!z_data], -0.25*ones(sum(.!z_data)), color="orange", linewidth=2, marker=".") 99 | 100 | # Plot estimated distribution 101 | x_test = range(-4, stop=4, length=200) 102 | d1_est = Normal(mean(marginals[:m_1]), sqrt(var(marginals[:m_1]))) 103 | d2_est = Normal(mean(marginals[:m_2]), sqrt(var(marginals[:m_2]))) 104 | pi_est = mean(marginals[:_pi]) 105 | gmm_pdf = pi_est * pdf.(Ref(d1_est), x_test) + (1-pi_est) * pdf.(Ref(d2_est), x_test) 106 | plot(x_test, gmm_pdf, color="k") 107 | xlabel(L"y") 108 | ylabel(L"p(y|\mathcal{D})") 109 | xlim([-4,4]) 110 | grid() 111 | tight_layout() 112 | gcf() 113 | 114 | 115 | #== 116 | using Plots; pyplot() 117 | 118 | # Plot free energy to check for convergence 119 | plot(1:n_its, F, c=:black, lab="Free energy") 120 | xlabel!("Iteration") 121 | ylabel!("F") 122 | 123 | # Plot data, superimpose fitted models 124 | scatter(y_data[z_data], -0.25*ones(sum(z_data)), color=:blue, markersize=6) 125 | scatter!(y_data[.!z_data], -0.25*ones(sum(.!z_data)), color=:orange, markersize=6) 126 | x_test = range(-4, stop=4, length=200) 127 | d1_est = Normal(mean(marginals[:m_1]), sqrt(var(marginals[:m_1]))) 128 | d2_est = Normal(mean(marginals[:m_2]), sqrt(var(marginals[:m_2]))) 129 | pi_est = mean(marginals[:_pi]) 130 | gmm_pdf = pi_est * pdf.(Ref(d1_est), x_test) + (1-pi_est) * pdf.(Ref(d2_est), x_test) 131 | plot!(x_test, gmm_pdf, color=:black) 132 | xlabel!(L"y") 133 | str = "\$p(y|\\mathcal{D})\$"; ylabel!(str) 134 | xlims!((-4,4)) 135 | grid!() 136 | ==# 137 | -------------------------------------------------------------------------------- /FactorGraphs/gauss-bayes-rule-2d.jl: -------------------------------------------------------------------------------- 1 | # Bayes rule for a linear Gaussian model 2 | # X -> Y, observe Y, infer X 3 | # Y ~ N(A x + b, Sigma_y) 4 | # X ~ N(mu_x, Sigma_x) 5 | 6 | # We consider the example from "Machine learning: a probabilistic perspective" 7 | # (Murphy, 2012) sec 4.4.2.2. 8 | # where A=I, b=0, mu=[0.5 0.5], Sigma_x=0.1I, Sigma_y=0.1*[2 1; 1 1] 9 | # Corresponding Matlab code is here: 10 | # https://github.com/probml/pmtk3/blob/master/demos/gaussInferParamsMean2d.m 11 | # Figures should look like this 12 | # http://people.cs.ubc.ca/~murphyk/MLbook/figReport-16-Aug-2012/pdfFigures/gauss2dupdatePostSubplot.pdf 13 | 14 | using ForneyLab 15 | using LinearAlgebra, Distributions, Test 16 | 17 | function make_data(params, n_data) 18 | y_mean = params.A * params.true_x + params.b 19 | y_dist = Distributions.MvNormal(y_mean, params.Sigma_y) 20 | y_data = rand(y_dist, n_data) # n_data x obs_dim 21 | return y_data 22 | end 23 | 24 | function make_factor_graph(params) 25 | g = FactorGraph() 26 | @RV x ~ GaussianMeanVariance(params.mu_x, params.Sigma_x) 27 | mu_y = params.A * x + params.b 28 | @RV y ~ GaussianMeanVariance(mu_y, params.Sigma_y) 29 | placeholder(y, :y, dims=(length(params.b),)) 30 | fg = (x = x, y = y) 31 | return fg 32 | end 33 | 34 | function make_fg_inference(fg) 35 | algo = Meta.parse(sumProductAlgorithm(fg.x)) 36 | eval(algo) # compile the step! function 37 | function infer(y_data) 38 | data = Dict(fg.y.id => y_data) 39 | marginals = step!(data); 40 | post = marginals[:x] 41 | post_gauss = Distributions.MvNormal(mean(post), cov(post)) 42 | return post_gauss 43 | end 44 | return infer 45 | end 46 | 47 | function bayes_rule_lin_gauss(params, y) 48 | # Implements eqn 4.125 of MLAPP 49 | A = params.A 50 | At = permutedims(A) 51 | b = params.b 52 | prior_prec = inv(params.Sigma_x) 53 | prior_mean = params.mu_x 54 | obs_prec = inv(params.Sigma_y) 55 | post_prec = prior_prec + At * obs_prec * A 56 | post_cov = inv(post_prec) 57 | post_mean = post_cov*(At * obs_prec * (y-b) + prior_prec * prior_mean) 58 | post_gauss = Distributions.MvNormal(post_mean, post_cov) 59 | return post_gauss 60 | end 61 | 62 | #= 63 | params = Dict( 64 | :true_x => [0.5, 0.5], 65 | :mu_x => [0.0, 0.0], 66 | :Sigma_x => 0.1 * eye(2), 67 | :A => eye(2), 68 | :b => [0.0, 0.0], 69 | :Sigma_y => 0.1 .* [2 1; 1 1] 70 | ) 71 | =# 72 | 73 | params_gen = 74 | (true_x = [0.5, 0.5], 75 | mu_x = [0.0, 0.0], 76 | Sigma_x = 0.1 * eye(2), 77 | A = eye(2), 78 | b = [0.0, 0.0], 79 | Sigma_y = 0.1 .* [2 1; 1 1] 80 | ) 81 | 82 | n_data = 10 83 | Random.seed!(1) 84 | y_data_all = make_data(params_gen, n_data) 85 | y_data_mean = vec(mean(y_data_all, dims=2)) 86 | 87 | params = 88 | (true_x = [0.5, 0.5], 89 | mu_x = [0.0, 0.0], 90 | Sigma_x = 0.1 * eye(2), 91 | A = eye(2), 92 | b = [0.0, 0.0], 93 | Sigma_y = 0.1 .* [2 1; 1 1] / n_data # since using average of observations 94 | ) 95 | fg = make_factor_graph(params) 96 | infer = make_fg_inference(fg) 97 | post_gauss = infer(y_data_mean) 98 | post_gauss2 = bayes_rule_lin_gauss(params, y_data_mean) 99 | @test isapprox(mean(post_gauss), mean(post_gauss2)) 100 | @test isapprox(cov(post_gauss), cov(post_gauss2)) 101 | 102 | using Plots; pyplot() 103 | closeall() 104 | xrange = -1.5:0.01:1.5 105 | yrange = xrange 106 | plt = scatter(y_data_all[1,:], y_data_all[2,:], label="obs", reuse=false) 107 | scatter!([params.true_x[1]], [params.true_x[2]], label="truth") 108 | xlims!(minimum(xrange), maximum(xrange)) 109 | ylims!(minimum(xrange), maximum(xrange)) 110 | title!("Data") 111 | savefig("Figures/gauss-bayes-rule-2d-data.png") 112 | display(plt) 113 | 114 | prior_gauss = Distributions.MvNormal(params.mu_x, params.Sigma_x) 115 | plt = contour(xrange, yrange, (x,y)->Distributions.pdf(prior_gauss,[x,y]), 116 | reuse=false, title="prior") 117 | savefig("Figures/gauss-bayes-rule-2d-prior.png") 118 | display(plt) 119 | 120 | plt = contour(xrange, yrange, (x,y)->Distributions.pdf(post_gauss,[x,y]), 121 | reuse=false, title = "Posterior") 122 | savefig("Figures/gauss-bayes-rule-2d-post.png") 123 | display(plt) 124 | -------------------------------------------------------------------------------- /Old/flux-demo.jl: -------------------------------------------------------------------------------- 1 | #https://fluxml.ai/Flux.jl/stable/models/basics/ 2 | using Flux.Tracker 3 | using Flux 4 | 5 | f(x) = 3x^2 + 2x + 1 6 | 7 | # df/dx = 6x + 2 8 | df(x) = Tracker.gradient(f, x)[1] 9 | 10 | df(2) # 14.0 (tracked) 11 | 12 | # d²f/dx² = 6 13 | d2f(x) = Tracker.gradient(df, x; nest=true)[1] 14 | 15 | # This fails: known bug 16 | # https://github.com/FluxML/Flux.jl/issues/566 17 | #d2f(2) # 6.0 (tracked) 18 | 19 | f(W, b, x) = W * x + b 20 | 21 | Tracker.gradient(f, 2, 3, 4) 22 | #(4.0 (tracked), 1.0, 2.0 (tracked)) 23 | 24 | W = param(2) # 2.0 (tracked) 25 | b = param(3) # 3.0 (tracked) 26 | 27 | f(x) = W * x + b 28 | 29 | params = Params([W, b]) 30 | grads = Tracker.gradient(() -> f(4), params) 31 | 32 | grads[W] # 4.0 df/dW = x = 4 33 | grads[b] # 1.0 df/db = 1 34 | 35 | 36 | f2(W, b, x) = W * x + b 37 | W = param(2) 38 | b = param(3) 39 | x = param(4) 40 | params2 = Params([W, b, x]) 41 | grads2 = Tracker.gradient(() -> f2, params2) 42 | 43 | 44 | ### 45 | Random.seed!(1234) 46 | W = rand(2, 5) 47 | b = rand(2) 48 | 49 | predict(x) = W*x .+ b 50 | 51 | function loss(x, y) 52 | ŷ = predict(x) 53 | sum((y .- ŷ).^2) 54 | end 55 | 56 | x, y = rand(5), rand(2) # Dummy data 57 | loss(x, y) # ~ 2.8 58 | 59 | 60 | W = param(W) 61 | b = param(b) 62 | 63 | gs = Tracker.gradient(() -> loss(x, y), Params([W, b])) 64 | 65 | using Flux.Tracker: update! 66 | 67 | Δ = gs[W] 68 | 69 | # Update the parameter and reset the gradient 70 | update!(W, -0.1Δ) 71 | 72 | loss(x, y) # ~ 1.4 73 | 74 | function linear(in, out) 75 | W = param(randn(out, in)) 76 | b = param(randn(out)) 77 | x -> W * x .+ b 78 | end 79 | 80 | linear1 = linear(5, 3) # we can access linear1.W etc 81 | linear2 = linear(3, 2) 82 | 83 | model(x) = linear2(σ.(linear1(x))) 84 | 85 | model(rand(5)) # => 2-element vector 86 | 87 | ############ 88 | 89 | nin = 5; nhid = 2; 90 | W = param(rand(nhid, nin)) 91 | b = param(rand(nhid)) 92 | f(x, W, b) = sum(W*x .+ b) 93 | x = rand(nin) 94 | f(x, W, b) 95 | g = Tracker.gradient(() -> f(x,W,b), Params([x,W,b])) 96 | 97 | 98 | nin = 3 99 | wp = param([-1,0,1]) 100 | bp = param(0.0) 101 | f(x, w, b) = sum( w .*x + b) 102 | xin = 1:3 103 | f(xin, wp, bp) 104 | g = Tracker.gradient(() -> f(xin, wp, bp), Params([wp,bp])) 105 | 106 | ### 107 | 108 | #https://stackoverflow.com/questions/49135107/logistic-regression-using-flux-jl 109 | 110 | 111 | using GLM, DataFrames, Flux.Tracker 112 | 113 | Random.seed!(1) 114 | n = 100 115 | df = DataFrame(s1=rand(n), s2=rand(n)) 116 | df[:y] = rand(n) .< 1 ./ (1 .+ exp.(-(1 .+ 2 .* df[1] .+ 0.5 .* df[2]))) 117 | model2 = glm(@formula(y~s1+s2), df, Binomial(), LogitLink()) 118 | 119 | x = Matrix(df[1:2]) 120 | y = df[3] 121 | N=10; x = rand(N,2); y = rand([false,true], N) 122 | W = param(rand(2,1)) 123 | b = param(rand(1)) 124 | predict(x) = 1.0 ./ (1.0 .+ exp.(-x*W .- b)) 125 | loss(x,y) = -sum(log.(predict(x[y,:]))) - sum(log.(1 .- predict(x[.!y,:]))) 126 | 127 | function update!(ps, η = .01) 128 | for w in ps 129 | w.data .-= w.grad .* η 130 | w.grad .= 0 131 | end 132 | end 133 | 134 | maxiter = 10 135 | for iter=1:maxiter 136 | back!(loss(x,y)) 137 | if max(maximum(abs.(W.grad)), abs(b.grad[1])) < 0.01 138 | break 139 | end 140 | update!((W, b)) 141 | end 142 | 143 | ################ 144 | # Least squares regression 145 | 146 | N = 10; D = 2; 147 | Random.seed!(1) 148 | X = rand(N, D) 149 | X1 = [ones(N) X] 150 | X1t = transpose(X1) 151 | wtrue = rand(D+1) 152 | yobs = X1 * wtrue 153 | loss(w) = sum((X1 * w - yobs) .^2) 154 | 155 | # OLS solution 156 | # http://web.stanford.edu/class/ee103/julia_slides/julia_least_squares_slides.pdf 157 | west1 = inv(X1t * X1) * X1t * yobs 158 | west2 = X1 \ yobs 159 | #using LinearAlgebra 160 | #F = qr(X1) 161 | #@test isapprox(X1, F.Q * F.R) 162 | #west3 = inv(F.R)*(F.Q'*yobs) 163 | 164 | # Gradient method 165 | wp = param(rand(D+1)) 166 | gradloss(wp) = (back!(loss(wp)); wp.grad) 167 | dloss(w) = 2*(X1t * X1 * w - X1t * ytrue) 168 | @test isapprox(gradloss(wp), dloss(wp), 1e-2) 169 | 170 | 171 | using Optim 172 | x0 = zeros(2) 173 | result1 = optimize(rosenbrock, x0, LBFGS(m=5)) 174 | sol1 = Optim.minimizer(result1) 175 | ncalls1 = result1.iterations 176 | 177 | result2 = optimize(rosenbrock, x0, LBFGS(m=5); autodiff = :forward) 178 | sol2 = Optim.minimizer(result2) 179 | ncalls2 = result2.iterations 180 | 181 | result3 = optimize(rosenbrock, rosenbrock_grad!, x0, LBFGS(m=5)) 182 | sol3 = Optim.minimizer(result3) 183 | ncalls3 = result3.iterations 184 | -------------------------------------------------------------------------------- /FactorGraphs/structured_mean_field.jl: -------------------------------------------------------------------------------- 1 | 2 | # Mean field vs structured mean field for a 1d Gaussian SSM with unknown process noise variance 3 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/7_structured_variational_estimation.ipynb 4 | 5 | Random.seed!(1) 6 | n_samples = 100 7 | v_data = 1.0 8 | w_data = 10.0 9 | s_0_data = 0.0 10 | 11 | s_data = Vector{Float64}(undef, n_samples) 12 | x_data = Vector{Float64}(undef, n_samples) 13 | s_t_min_data = s_0_data 14 | for t = 1:n_samples 15 | global s_t_min_data 16 | s_data[t] = s_t_min_data + sqrt(1/w_data)*randn() # Evolution model 17 | x_data[t] = s_data[t] + sqrt(v_data)*randn() # Observation model 18 | 19 | s_t_min_data = s_data[t] 20 | end 21 | ; 22 | 23 | using ForneyLab 24 | 25 | g = FactorGraph() 26 | 27 | @RV w ~ Gamma(0.01, 0.01) 28 | @RV s_0 ~ GaussianMeanVariance(0.0, 100.0) 29 | 30 | s = Vector{Variable}(undef, n_samples) 31 | x = Vector{Variable}(undef, n_samples) 32 | s_t_min = s_0 33 | for t = 1:n_samples 34 | global s_t_min 35 | @RV s[t] ~ GaussianMeanPrecision(s_t_min, w) 36 | @RV x[t] ~ GaussianMeanVariance(s[t], v_data) 37 | 38 | s_t_min = s[t] 39 | 40 | placeholder(x[t], :x, index=t) 41 | end 42 | ; 43 | 44 | 45 | # Define recognition factorization 46 | RecognitionFactorization() 47 | 48 | q_w = RecognitionFactor(w) 49 | q_s_0 = RecognitionFactor(s_0) 50 | 51 | q_s = Vector{RecognitionFactor}(undef, n_samples) 52 | for t=1:n_samples 53 | q_s[t] = RecognitionFactor(s[t]) 54 | end 55 | 56 | println("generate code") 57 | algo_w_mf = variationalAlgorithm(q_w, name="WMF") 58 | algo_s_mf = variationalAlgorithm([q_s_0; q_s], name="SMF") 59 | algo_F_mf = freeEnergyAlgorithm(name="MF"); 60 | 61 | 62 | println("compilinig") 63 | eval(Meta.parse(algo_w_mf)) 64 | eval(Meta.parse(algo_s_mf)) 65 | eval(Meta.parse(algo_F_mf)) 66 | 67 | # Initialize data 68 | data = Dict(:x => x_data) 69 | n_its = 40 70 | 71 | # Initial recognition distributions 72 | marginals_mf = Dict{Symbol, ProbabilityDistribution}(:w => vague(Gamma)) 73 | for t = 0:n_samples 74 | marginals_mf[:s_*t] = vague(GaussianMeanPrecision) 75 | end 76 | 77 | # Run algorithm 78 | F_mf = Vector{Float64}(undef, n_its) 79 | for i = 1:n_its 80 | println("iteration $i") 81 | stepWMF!(data, marginals_mf) 82 | stepSMF!(data, marginals_mf) 83 | 84 | F_mf[i] = freeEnergyMF(data, marginals_mf) 85 | end 86 | ; 87 | 88 | # Define the recognition factorization 89 | q_struct = RecognitionFactorization(w, [s_0; s]; ids=[:WStruct, :SStruct]) 90 | 91 | println("generate code") 92 | algo_struct = variationalAlgorithm(q_struct) 93 | algo_F_struct = freeEnergyAlgorithm(name="Struct") 94 | 95 | 96 | println("compile code") 97 | eval(Meta.parse(algo_struct)) 98 | eval(Meta.parse(algo_F_struct)) 99 | 100 | # Initial recognition distributions 101 | marginals_struct = Dict{Symbol, ProbabilityDistribution}(:w => vague(Gamma)) 102 | 103 | # Run algorithm 104 | F_struct = Vector{Float64}(undef, n_its) 105 | for i = 1:n_its 106 | println("iteration $i") 107 | stepSStruct!(data, marginals_struct) 108 | stepWStruct!(data, marginals_struct) 109 | 110 | F_struct[i] = freeEnergyStruct(data, marginals_struct) 111 | end 112 | ; 113 | 114 | using PyPlot 115 | 116 | # Collect state estimates 117 | m_s_mf = [mean(marginals_mf[:s_*t]) for t=0:n_samples] 118 | v_s_mf = [var(marginals_mf[:s_*t]) for t=0:n_samples] 119 | m_s_struct = [mean(marginals_struct[:s_*t]) for t=0:n_samples] 120 | v_s_struct = [var(marginals_struct[:s_*t]) for t=0:n_samples] 121 | 122 | # Plot estimated state 123 | figure(figsize=(8,4)) 124 | #subplot(211) 125 | plot(collect(1:n_samples), x_data, "*", label="Data", color="black") 126 | plot(collect(0:n_samples), [0.0; s_data], "k-", label="True hidden state") 127 | plot(collect(0:n_samples), m_s_struct, "b-", label="Structured state estimate") 128 | fill_between(collect(0:n_samples), m_s_struct-sqrt.(v_s_struct), m_s_struct+sqrt.(v_s_struct), color="b", alpha=0.3); 129 | plot(collect(0:n_samples), m_s_mf, "r-", label="Mean-field state estimate") 130 | fill_between(collect(0:n_samples), m_s_mf-sqrt.(v_s_mf), m_s_mf+sqrt.(v_s_mf), color="r", alpha=0.3); 131 | grid("on") 132 | xlabel("Time") 133 | legend(loc="upper left", bbox_to_anchor=[-0.05, 1.5]); 134 | gcf() 135 | 136 | #subplot(212) 137 | clf() 138 | plot(1:n_its, F_struct, color="blue", label="Structured") 139 | plot(1:n_its, F_mf, color="red", label="Mean-field") 140 | grid("on") 141 | legend() 142 | yscale("log") 143 | xlabel("VMP iteration") 144 | ylabel("Variational free energy") 145 | tight_layout(); 146 | gcf() 147 | -------------------------------------------------------------------------------- /Old/logreg-opt-demo.jl: -------------------------------------------------------------------------------- 1 | # Examples of how to fit a logistic regression model 2 | 3 | #https://fluxml.ai/Flux.jl/v0.1.1/examples/logreg.html 4 | #http://marubon-ds.blogspot.com/2018/05/deep-learning-with-julia-introduction.html 5 | 6 | #https://github.com/FluxML/Flux.jl/blob/1cf37ab9eb806368d5702ab8053d5bc5b46180cf/src/layers/stateless.jl 7 | 8 | #https://github.com/FluxML/NNlib.jl/blob/master/src/logsoftmax.jl 9 | 10 | #https://www.linkedin.com/pulse/creating-deep-neural-network-model-learn-handwritten-digits-mike-gold/ 11 | 12 | 13 | import Flux 14 | 15 | 16 | """ 17 | logreg_sample(W, X) 18 | 19 | Sample from a logistic regression model with C classes and D features. 20 | 21 | Inputs: 22 | `W`: C*D matrix of weights. 23 | `X`: D*N matrix of feature vectors, one example per column. 24 | 25 | Outputs: 26 | `labels`: N-vector of integer labels from {1,...,C}. 27 | """ 28 | function logreg_sample(W::Array{Float64,2}, X::Array{Float64,2}) 29 | C, DW = size(W) 30 | DX, N = size(X) 31 | @assert DW == DX 32 | labels = Array{Int}(undef, N) 33 | # Binary case 34 | #y = Flux.logistic.(X * w) .< rand(N) 35 | for i=1:N 36 | probs = Flux.softmax(W * X[:,i]) 37 | dist = Distributions.Categorical(probs) 38 | labels[i] = rand(dist) 39 | end 40 | return labels 41 | end 42 | # @code_warntype logreg_sample(W,X) # LGTM 43 | 44 | 45 | function make_test_data() 46 | Random.seed!(1) 47 | C = 3; 48 | D = 4; 49 | N = 5; 50 | X = randn(D, N) # note that examples are stored in the columns 51 | W = randn(C, D) 52 | Y_cat = logreg_sample(W, X) 53 | Y_onehot = Flux.onehotbatch(Y_cat, 1:C) # C*N matrix of bools 54 | @test Y_cat[1] == findfirst(x -> x, Y_onehot[:,1]) 55 | return W, X, Y_onehot 56 | end 57 | 58 | """ 59 | logreg_nll_matrix(W, X, Y) 60 | 61 | Compute average negative log likelihood for a logistic regression model. 62 | 63 | Inputs: 64 | `W`: C*D matrix of weights. 65 | `X`: D*N matrix of feature vectors, one example per column. 66 | `Y`: C*N one hot matrix of labels. 67 | 68 | Outputs: 69 | nll: -1/N * sum_{n=1}^N sum_{c=1}^C Y[c,n] * log(Pr[c,n]) 70 | where Pr[:,n] = softmax(W*X[:,n]) 71 | """ 72 | function logreg_nll_matrix(W::Array{Float64,2}, X::Array{Float64,2}, 73 | Y::Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}) 74 | CW, DW = size(W) 75 | DX, NX = size(X) 76 | CY, NY = size(Y) 77 | @assert DW == DX 78 | @assert CY == CW 79 | @assert N == NY 80 | nll = 0.0 81 | for i=1:NX 82 | x = X[:,i] 83 | y = Y[:,i] 84 | probs = Flux.softmax(W*x) 85 | nll += -sum(y .* log.(probs)) # PMLv1 p255 86 | end 87 | return nll / N 88 | end 89 | 90 | 91 | 92 | """ 93 | logreg_nll(w, X, Y) 94 | 95 | Compute average negative log likelihood for a logistic regression model. 96 | 97 | Inputs: 98 | `w`: C*D 1D vector of weights, c changing fastest. 99 | `X`: D*N matrix of feature vectors, one example per column. 100 | `Y`: C*N one hot matrix of labels. 101 | 102 | Outputs: 103 | nll: -1/N * sum_{n=1}^N sum_{c=1}^C Y[c,n] * log(Pr[c,n]) 104 | where Pr[:,n] = softmax(W*X[:,n]) 105 | """ 106 | function logreg_nll(w::Array{Float64,1}, X::Array{Float64,2}, 107 | Y::Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}) 108 | D, NX = size(X) 109 | C, NY = size(Y) 110 | @assert NX == NY 111 | W = reshape(w, (C, D)) 112 | return logreg_nll_matrix(W, X, Y) 113 | end 114 | 115 | 116 | function logreg_nll_test() 117 | W, X, Y = make_test_data() 118 | nll = logreg_nll(vec(W), X, Y) 119 | logits = W*X # CxD 120 | nll2 = Flux.logitcrossentropy(logits, Y) 121 | @test isapprox(nll, nll2) 122 | end 123 | 124 | function logreg_nll_grad(w, X, Y) 125 | D, NX = size(X) 126 | C, NY = size(Y) 127 | @assert NX == NY 128 | W = reshape(w, (C, D)) 129 | G = zeros(Float64, C, D) 130 | for i=1:NX 131 | x = X[:,i] 132 | y = Y[:,i] 133 | probs = Flux.softmax(W*x) 134 | delta = probs - y 135 | for c=1:C 136 | G[c,:] += delta[c] .* x 137 | end 138 | end 139 | return vec(G) 140 | end 141 | 142 | function logreg_nll_grad_test() 143 | W0, X, Y = make_test_data() 144 | w0 = vec(W0) 145 | g = logreg_nll_grad(w0, X, Y) 146 | g2 = Flux.gradient(w -> logreg_nll(w, X, Y), w0) 147 | @test isapprox(g, g2) 148 | end 149 | 150 | import Optim 151 | function logreg_fit_test() 152 | W, X, Y = make_test_data() 153 | w0 = vec(randn(size(W))) 154 | objective(w) = logreg_nll(w, X, Y) 155 | result = Optim.optimize(objective, w0, Optim.LBFGS()) 156 | West = reshape(Optim.minimizer(result), (C,D)) 157 | ncalls = result.iterations 158 | end 159 | 160 | 161 | #logreg_fit_test() 162 | #logreg_sample_test() 163 | #logreg_nll_test() 164 | #logreg_nll_grad_test() 165 | -------------------------------------------------------------------------------- /FactorGraphs/hmm.jl: -------------------------------------------------------------------------------- 1 | 2 | # Variational Bayes for a discrete observation HMM 3 | 4 | #https://github.com/biaslab/ForneyLab.jl/blob/master/demo/8_hidden_markov_model_estimation.ipynb 5 | 6 | using ForneyLab 7 | 8 | n_samples = 20 9 | A_data = [0.9 0.0 0.1; 0.1 0.9 0.0; 0.0 0.1 0.9] # Transition probabilities (some transitions are impossible) 10 | B_data = [0.9 0.05 0.05; 0.05 0.9 0.05; 0.05 0.05 0.9] # Observation noise 11 | 12 | s_0_data = [1.0, 0.0, 0.0] # Initial state 13 | 14 | # Generate some data 15 | Random.seed!(1) 16 | s_data = Vector{Vector{Float64}}(undef, n_samples) # one-hot encoding of the states 17 | x_data = Vector{Vector{Float64}}(undef, n_samples) # one-hot encoding of the observations 18 | s_t_min_data = s_0_data 19 | for t = 1:n_samples 20 | global s_t_min_data 21 | a = A_data*s_t_min_data 22 | s_data[t] = sample(ProbabilityDistribution(Categorical, p=a./sum(a))) # Simulate state transition 23 | b = B_data*s_data[t] 24 | x_data[t] = sample(ProbabilityDistribution(Categorical, p=b./sum(b))) # Simulate observation 25 | 26 | s_t_min_data = s_data[t] 27 | end 28 | ; 29 | 30 | g = FactorGraph() 31 | 32 | @RV A ~ Dirichlet(ones(3,3)) # Vague prior on transition model 33 | @RV B ~ Dirichlet([10.0 1.0 1.0; 1.0 10.0 1.0; 1.0 1.0 10.0]) # Stronger prior on observation model 34 | @RV s_0 ~ Categorical(1/3*ones(3)) 35 | 36 | s = Vector{Variable}(undef, n_samples) # one-hot coding 37 | x = Vector{Variable}(undef, n_samples) # one-hot coding 38 | s_t_min = s_0 39 | for t = 1:n_samples 40 | global s_t_min 41 | @RV s[t] ~ Transition(s_t_min, A) 42 | @RV x[t] ~ Transition(s[t], B) 43 | 44 | s_t_min = s[t] 45 | 46 | placeholder(x[t], :x, index=t, dims=(3,)) 47 | end; 48 | 49 | # Define the recognition factorization 50 | q = RecognitionFactorization(A, B, [s_0; s], ids=[:A, :B, :S]) 51 | 52 | # Generate VMP algorithm 53 | algo = variationalAlgorithm(q) 54 | 55 | # Construct variational free energy evaluation code 56 | algo_F = freeEnergyAlgorithm(q); 57 | 58 | # Load algorithms 59 | eval(Meta.parse(algo)) 60 | eval(Meta.parse(algo_F)) 61 | 62 | # Initial recognition distributions 63 | marginals = Dict{Symbol, ProbabilityDistribution}( 64 | :A => vague(Dirichlet, (3,3)), 65 | :B => vague(Dirichlet, (3,3))) 66 | 67 | # Initialize data 68 | data = Dict(:x => x_data) 69 | n_its = 20 70 | 71 | # Run algorithm 72 | F = Vector{Float64}(undef, n_its) 73 | for i = 1:n_its 74 | stepS!(data, marginals) 75 | stepB!(data, marginals) 76 | stepA!(data, marginals) 77 | 78 | F[i] = freeEnergy(data, marginals) 79 | end 80 | ; 81 | 82 | using PyPlot 83 | 84 | # Plot free energy 85 | clf() 86 | plot(1:n_its, F, color="black") 87 | grid("on") 88 | xlabel("Iteration") 89 | ylabel("Free Energy") 90 | xlim(0,n_its); 91 | gcf() 92 | 93 | figure(figsize=(10,5)) 94 | # Collect state estimates 95 | x_obs = [findfirst(x_i.==1.0) for x_i in x_data] 96 | s_true = [findfirst(s_i.==1.0) for s_i in s_data] 97 | 98 | # Plot simulated state trajectory and observations 99 | clf() 100 | subplot(211) 101 | plot(1:n_samples, x_obs, "k*", label="Observations x", markersize=7) 102 | plot(1:n_samples, s_true, "k--", label="True state s") 103 | yticks([1.0, 2.0, 3.0], ["Red", "Green", "Blue"]) 104 | grid("on") 105 | xlabel("Time") 106 | legend(loc="upper left") 107 | xlim(0,n_samples) 108 | ylim(0.9,3.1) 109 | title("Data set and true state trajectory") 110 | gcf() 111 | 112 | # Plot inferred state sequence 113 | subplot(212) 114 | m_s = [mean(marginals[:s_*t]) for t=1:n_samples] 115 | m_s_1 = [m_s_t[1] for m_s_t in m_s] 116 | m_s_2 = [m_s_t[2] for m_s_t in m_s] 117 | m_s_3 = [m_s_t[3] for m_s_t in m_s] 118 | 119 | fill_between(1:n_samples, zeros(n_samples), m_s_1, color="red") 120 | fill_between(1:n_samples, m_s_1, m_s_1 + m_s_2, color="green") 121 | fill_between(1:n_samples, m_s_1 + m_s_2, ones(n_samples), color="blue") 122 | xlabel("Time") 123 | ylabel("State belief") 124 | grid("on") 125 | title("Inferred state trajectory"); 126 | gcf() 127 | 128 | # True state transition probabilities 129 | clf() 130 | PyPlot.plt[:matshow](A_data, vmin=0.0, vmax=1.0) 131 | ttl = title("True state transition probabilities") 132 | ttl[:set_position]([.5, 1.15]) 133 | yticks([0, 1, 2], ["Red", "Green", "Blue"]) 134 | xticks([0, 1, 2], ["Red", "Green", "Blue"], rotation="vertical") 135 | colorbar() 136 | gcf() 137 | 138 | # Inferred state transition probabilities 139 | PyPlot.plt[:matshow](mean(marginals[:A]), vmin=0.0, vmax=1.0) 140 | ttl = title("Inferred state transition probabilities") 141 | ttl[:set_position]([.5, 1.15]) 142 | yticks([0, 1, 2], ["Red", "Green", "Blue"]) 143 | xticks([0, 1, 2], ["Red", "Green", "Blue"], rotation="vertical") 144 | colorbar(); 145 | gcf() 146 | -------------------------------------------------------------------------------- /Old/logreg-binary-opt-demo.jl: -------------------------------------------------------------------------------- 1 | # Examples of how to fit a binary logistic regression model using various methods. 2 | # We compute gradient analytically or using flux. 3 | # We use GLM package as a unit test of correctness. 4 | # https://stackoverflow.com/questions/49135107/logistic-regression-using-flux-jl 5 | 6 | 7 | using Flux 8 | 9 | function logreg_binary_predict(w, X) 10 | DX, N = size(X) 11 | DW = length(w) 12 | @assert DX == DW 13 | logits = [sum(w .* X[:,i]) for i in 1:N] 14 | probs = [Flux.sigmoid(l) for l in logits] 15 | #W = reshape(w, (1, D)) 16 | #probs = vec(Flux.sigmoid.(W * X)) # 1xN->N vector 17 | labels = [(p > 0.5 ? 1 : 0) for p in probs] 18 | return probs, labels, logits 19 | end 20 | 21 | function logreg_binary_sample(w, X) 22 | probs = logreg_binary_predict(w, X)[1] 23 | D, N = size(X) 24 | labels = probs .< rand(N) 25 | return labels 26 | end 27 | 28 | function make_test_data_binary(;N=20, D=2) 29 | Random.seed!(1) 30 | X = randn(D, N) # note that examples are stored in the columns 31 | w = randn(D) 32 | y = logreg_binary_sample(w, X) 33 | return w, X, y 34 | end 35 | 36 | function logreg_binary_nll(w, X, y) 37 | D, N = size(X) 38 | p = logreg_binary_predict(w, X)[1] 39 | return -sum(y .* log.(p) + (1 .- y) .* log.(1 .- p)) / N 40 | end 41 | 42 | function logreg_binary_nll_test() 43 | w, X, y = make_test_data_binary() 44 | D, N = size(X) 45 | nll1 = logreg_binary_nll(w, X, y) 46 | probs, labels, logits = logreg_binary_predict(w, X) 47 | nll2 = sum(Flux.logitbinarycrossentropy.(logits, y)) / N 48 | @test isapprox(nll1, nll2) 49 | end 50 | 51 | function logreg_binary_nll_grad(w, X, y) 52 | D, N = size(X) 53 | g = zeros(Float64,D) 54 | for i=1:N 55 | x = X[:,i] 56 | p = Flux.sigmoid(sum(w .* x)) 57 | g[:] += (p - y[i]) .* x 58 | end 59 | return g / N 60 | end 61 | 62 | function logreg_binary_nll_grad_test() 63 | w0, X, y = make_test_data_binary() 64 | g1 = logreg_binary_nll_grad(w0, X, y) 65 | g2 = Flux.gradient(w -> logreg_binary_nll(w, X, y), w0)[1] 66 | @test isapprox(g1, g2) 67 | end 68 | 69 | import Optim 70 | function logreg_binary_fit_bfgs(X, y; w0=nothing, max_iter=100) 71 | D, N = size(X) 72 | if isnothing(w0); w0 = randn(D); end 73 | objective(w) = logreg_binary_nll(w, X, y) 74 | grad(w) = logreg_binary_nll_grad(w, X, y) 75 | solver = Optim.LBFGS(;linesearch = Optim.BackTracking()) 76 | opts = Optim.Options(iterations = max_iter) 77 | result = Optim.optimize(objective, grad, w0, solver, opts; inplace=false) 78 | w_est = result.minimizer 79 | return w_est 80 | end 81 | 82 | include("first-order.jl") 83 | function logreg_binary_fit_adam(X, y; w0=nothing, max_iter=100) 84 | D, N = size(X) 85 | if isnothing(w0); w0 = randn(D); end 86 | objective(w) = logreg_binary_nll(w, X, y) 87 | grad(w) = logreg_binary_nll_grad(w, X, y) 88 | solver = Adam(0.2) 89 | w_est = run_descent_method(solver, objective, grad, w0; max_iter=max_iter) 90 | return w_est 91 | end 92 | 93 | function logreg_binary_fit_flux(X, y; w0=nothing, max_iter=100) 94 | D, N = size(X) 95 | #initW(out, in) = reshape(w0, (1,D)) 96 | #https://github.com/FluxML/Flux.jl/issues/332 97 | #model = Chain(Dense(D, 1; initW = initW), sigmoid) 98 | #model = Chain(Dense(D, 1), sigmoid) 99 | model = Chain(Dense(D, 2), softmax) 100 | #loss(x, y) = Flux.binarycrossentropy(model(x), y) 101 | loss(x, y) = crossentropy(model(x), y) 102 | Y = onehotbatch(y, 0:1) 103 | data = (X,Y) 104 | callback() = @show(loss(X, Y)) 105 | opt = Flux.Optimise.ADAM() 106 | for epoch=1:max_iter 107 | Flux.train!(loss, params(model), data, opt, cb = callback) 108 | end 109 | end 110 | 111 | #= 112 | D = 2; N = 20; 113 | X = randn(D, N) 114 | y = rand([0,1], N) 115 | D, N = size(X) 116 | data = repeated((X,Y),1) 117 | 118 | #model = Chain(Dense(D, 1), sigmoid) # DimensionMismatch("multiplicative identity defined only for square matrices") 119 | #model = Dense(D, 1, sigmoid) #LoadError: no method matching eps(::TrackedArray{…,Array{Float32,2}}) 120 | #loss(x, y) = Flux.binarycrossentropy(model(x), y) 121 | #Y = y 122 | model = Chain(Dense(D, 2), softmax) 123 | #model = Dense(D, 2, softmax) # MethodError: no method matching similar(::Float32) 124 | loss(x, y) = crossentropy(model(x), y) 125 | Y = onehotbatch(y, 0:1) 126 | 127 | callback() = @show(loss(X, Y)) 128 | opt = Flux.Optimise.ADAM() 129 | Flux.train!(loss, params(model), data, opt, cb = callback) 130 | =# 131 | 132 | #= 133 | #https://github.com/FluxML/model-zoo/blob/master/vision/mnist/mlp.jl 134 | 135 | imgs = MNIST.images() #60k-vector, imgs[1] is 28x28 matrix of Grayscale 136 | # Stack images into one large batch 137 | XX = hcat(float.(reshape.(imgs, :))...) # 784x60k of float 138 | 139 | labels = MNIST.labels() 140 | Y = onehotbatch(labels, 0:9) # 10k60k OneHotMatrix 141 | =# 142 | 143 | 144 | using GLM, DataFrames 145 | #https://discourse.julialang.org/t/how-to-fit-a-glm-to-all-unnamed-features-of-arbitrary-design-matrix/20490/3 146 | function logreg_binary_fit_glm(X, y) 147 | D, N = size(X) 148 | XT = permutedims(X) 149 | model = fit(GeneralizedLinearModel, XT, y, Bernoulli()) 150 | #df_y = DataFrame(y[:, :], [:yname]) 151 | #df_x_names = [Symbol("x$i") for i in 1:size(Xt)[2]] 152 | #df_x = DataFrame(Xt, df_x_names) 153 | #df = hcat(df_y, df_x) 154 | #lhs = :yname 155 | # include all variables :x1 to :xD but exclude intercept (hence -1) 156 | #rhs = Expr(:call, :+, -1, df_x_names...) 157 | #model = glm(@eval(@formula($(lhs) ~ $(rhs))), df, Bernoulli(), LogitLink()) 158 | return coef(model) 159 | end 160 | 161 | function logreg_binary_fit_test() 162 | wtrue, X, y = make_test_data_binary(;D=2) 163 | D, N = size(X) 164 | Random.seed!(1) 165 | winit = randn(D) 166 | wopt = logreg_binary_fit_glm(X, y) 167 | ll_opt = logreg_binary_nll(wopt, X, y) 168 | println("glm: ll $ll_opt, w $wopt") 169 | expts = Tuple{Any, String}[] 170 | push!(expts, (logreg_binary_fit_bfgs, "bfgs")) 171 | push!(expts, (logreg_binary_fit_adam, "adam")) 172 | push!(expts, (logreg_binary_fit_flux, "flux")) 173 | for (solver, method_name) in expts 174 | west = solver(X, y, w0=winit, max_iter=100) 175 | ll = logreg_binary_nll(west, X, y) 176 | println("$method_name, ll $ll, w $west") 177 | #@test isapprox(west, wopt; atol=0.01) 178 | #@test isapprox(ll, ll_opt; atol=0.1) 179 | end 180 | end 181 | 182 | logreg_binary_nll_test() 183 | logreg_binary_nll_grad_test() 184 | logreg_binary_fit_test() 185 | -------------------------------------------------------------------------------- /Old/logreg-binary-opt-old.jl: -------------------------------------------------------------------------------- 1 | # Examples of how to fit a binary logistic regression model using various methods. 2 | # We compute gradient analytically or using flux. 3 | # We use GLM package as a unit test of correctness. 4 | # https://stackoverflow.com/questions/49135107/logistic-regression-using-flux-jl 5 | 6 | 7 | using Flux 8 | 9 | function logreg_binary_predict(w, X) 10 | DX, N = size(X) 11 | DW = length(w) 12 | @assert DX == DW 13 | logits = [sum(w .* X[:,i]) for i in 1:N] 14 | probs = [Flux.sigmoid(l) for l in logits] 15 | #W = reshape(w, (1, D)) 16 | #probs = vec(Flux.sigmoid.(W * X)) # 1xN->N vector 17 | labels = [(p > 0.5 ? 1 : 0) for p in probs] 18 | return probs, labels, logits 19 | end 20 | 21 | function logreg_binary_sample(w, X) 22 | probs = logreg_binary_predict(w, X)[1] 23 | D, N = size(X) 24 | labels = probs .< rand(N) 25 | return labels 26 | end 27 | 28 | function make_test_data_binary(;N=20, D=2) 29 | Random.seed!(1) 30 | X = randn(D, N) # note that examples are stored in the columns 31 | w = randn(D) 32 | y = logreg_binary_sample(w, X) 33 | return w, X, y 34 | end 35 | 36 | function logreg_binary_nll(w, X, y) 37 | D, N = size(X) 38 | p = logreg_binary_predict(w, X)[1] 39 | return -sum(y .* log.(p) + (1 .- y) .* log.(1 .- p)) / N 40 | end 41 | 42 | function logreg_binary_nll_test() 43 | w, X, y = make_test_data_binary() 44 | D, N = size(X) 45 | nll1 = logreg_binary_nll(w, X, y) 46 | probs, labels, logits = logreg_binary_predict(w, X) 47 | nll2 = sum(Flux.logitbinarycrossentropy.(logits, y)) / N 48 | @test isapprox(nll1, nll2) 49 | end 50 | 51 | function logreg_binary_nll_grad(w, X, y) 52 | D, N = size(X) 53 | g = zeros(Float64,D) 54 | for i=1:N 55 | x = X[:,i] 56 | p = Flux.sigmoid(sum(w .* x)) 57 | g[:] += (p - y[i]) .* x 58 | end 59 | return g / N 60 | end 61 | 62 | function logreg_binary_nll_grad_test() 63 | w0, X, y = make_test_data_binary() 64 | g1 = logreg_binary_nll_grad(w0, X, y) 65 | g2 = Flux.gradient(w -> logreg_binary_nll(w, X, y), w0)[1] 66 | @test isapprox(g1, g2) 67 | end 68 | 69 | import Optim 70 | function logreg_binary_fit_bfgs(X, y; w0=nothing, max_iter=100) 71 | D, N = size(X) 72 | if isnothing(w0); w0 = randn(D); end 73 | objective(w) = logreg_binary_nll(w, X, y) 74 | grad(w) = logreg_binary_nll_grad(w, X, y) 75 | solver = Optim.LBFGS(;linesearch = Optim.BackTracking()) 76 | opts = Optim.Options(iterations = max_iter) 77 | result = Optim.optimize(objective, grad, w0, solver, opts; inplace=false) 78 | w_est = result.minimizer 79 | return w_est 80 | end 81 | 82 | include("first-order.jl") 83 | function logreg_binary_fit_adam(X, y; w0=nothing, max_iter=100) 84 | D, N = size(X) 85 | if isnothing(w0); w0 = randn(D); end 86 | objective(w) = logreg_binary_nll(w, X, y) 87 | grad(w) = logreg_binary_nll_grad(w, X, y) 88 | solver = Adam(0.2) 89 | w_est = run_descent_method(solver, objective, grad, w0; max_iter=max_iter) 90 | return w_est 91 | end 92 | 93 | function logreg_binary_fit_flux(X, y; w0=nothing, max_iter=100) 94 | D, N = size(X) 95 | #initW(out, in) = reshape(w0, (1,D)) 96 | #https://github.com/FluxML/Flux.jl/issues/332 97 | #model = Chain(Dense(D, 1; initW = initW), sigmoid) 98 | #model = Chain(Dense(D, 1), sigmoid) 99 | model = Chain(Dense(D, 2), softmax) 100 | #loss(x, y) = Flux.binarycrossentropy(model(x), y) 101 | loss(x, y) = crossentropy(model(x), y) 102 | Y = onehotbatch(y, 0:1) 103 | data = (X,Y) 104 | callback() = @show(loss(X, Y)) 105 | opt = Flux.Optimise.ADAM() 106 | for epoch=1:max_iter 107 | Flux.train!(loss, params(model), data, opt, cb = callback) 108 | end 109 | end 110 | 111 | #= 112 | D = 2; N = 20; 113 | X = randn(D, N) 114 | y = rand([0,1], N) 115 | D, N = size(X) 116 | data = repeated((X,Y),1) 117 | 118 | #model = Chain(Dense(D, 1), sigmoid) # DimensionMismatch("multiplicative identity defined only for square matrices") 119 | #model = Dense(D, 1, sigmoid) #LoadError: no method matching eps(::TrackedArray{…,Array{Float32,2}}) 120 | #loss(x, y) = Flux.binarycrossentropy(model(x), y) 121 | #Y = y 122 | model = Chain(Dense(D, 2), softmax) 123 | #model = Dense(D, 2, softmax) # MethodError: no method matching similar(::Float32) 124 | loss(x, y) = crossentropy(model(x), y) 125 | Y = onehotbatch(y, 0:1) 126 | 127 | callback() = @show(loss(X, Y)) 128 | opt = Flux.Optimise.ADAM() 129 | Flux.train!(loss, params(model), data, opt, cb = callback) 130 | =# 131 | 132 | #= 133 | #https://github.com/FluxML/model-zoo/blob/master/vision/mnist/mlp.jl 134 | 135 | imgs = MNIST.images() #60k-vector, imgs[1] is 28x28 matrix of Grayscale 136 | # Stack images into one large batch 137 | XX = hcat(float.(reshape.(imgs, :))...) # 784x60k of float 138 | 139 | labels = MNIST.labels() 140 | Y = onehotbatch(labels, 0:9) # 10k60k OneHotMatrix 141 | =# 142 | 143 | 144 | using GLM, DataFrames 145 | #https://discourse.julialang.org/t/how-to-fit-a-glm-to-all-unnamed-features-of-arbitrary-design-matrix/20490/3 146 | function logreg_binary_fit_glm(X, y) 147 | D, N = size(X) 148 | XT = permutedims(X) 149 | model = fit(GeneralizedLinearModel, XT, y, Bernoulli()) 150 | #df_y = DataFrame(y[:, :], [:yname]) 151 | #df_x_names = [Symbol("x$i") for i in 1:size(Xt)[2]] 152 | #df_x = DataFrame(Xt, df_x_names) 153 | #df = hcat(df_y, df_x) 154 | #lhs = :yname 155 | # include all variables :x1 to :xD but exclude intercept (hence -1) 156 | #rhs = Expr(:call, :+, -1, df_x_names...) 157 | #model = glm(@eval(@formula($(lhs) ~ $(rhs))), df, Bernoulli(), LogitLink()) 158 | return coef(model) 159 | end 160 | 161 | function logreg_binary_fit_test() 162 | wtrue, X, y = make_test_data_binary(;D=2) 163 | D, N = size(X) 164 | Random.seed!(1) 165 | winit = randn(D) 166 | wopt = logreg_binary_fit_glm(X, y) 167 | ll_opt = logreg_binary_nll(wopt, X, y) 168 | println("glm: ll $ll_opt, w $wopt") 169 | expts = Tuple{Any, String}[] 170 | push!(expts, (logreg_binary_fit_bfgs, "bfgs")) 171 | push!(expts, (logreg_binary_fit_adam, "adam")) 172 | push!(expts, (logreg_binary_fit_flux, "flux")) 173 | for (solver, method_name) in expts 174 | west = solver(X, y, w0=winit, max_iter=100) 175 | ll = logreg_binary_nll(west, X, y) 176 | println("$method_name, ll $ll, w $west") 177 | #@test isapprox(west, wopt; atol=0.01) 178 | #@test isapprox(ll, ll_opt; atol=0.1) 179 | end 180 | end 181 | 182 | logreg_binary_nll_test() 183 | logreg_binary_nll_grad_test() 184 | logreg_binary_fit_test() 185 | -------------------------------------------------------------------------------- /FactorGraphs/kalman_smoother_2d.jl: -------------------------------------------------------------------------------- 1 | 2 | # RTS smoothing for a linear Gaussian state space model. 3 | # based on https://github.com/probml/pmtk3/blob/master/demos/kalmanTrackingDemo.m 4 | # We consider the example from "Machine learning: a probabilistic perspective" 5 | # (Murphy, 2012) fig 18.1. 6 | 7 | using ForneyLab, LinearAlgebra, Test 8 | import Distributions # "Multivariate" clashes with ForneyLab 9 | #using PDMats 10 | 11 | function make_diagonal(v) 12 | d = length(v) 13 | A = zeros(d,d) 14 | A[diagind(A)] = v 15 | return A 16 | end 17 | 18 | function make_params() 19 | F = [1 0 1 0; 0 1 0 1; 0 0 1 0; 0 0 0 1.0]; 20 | H = [1.0 0 0 0; 0 1 0 0]; 21 | nobs, nhidden = size(H) 22 | #Q = PDiagMat(fill(0.001, nhidden)) 23 | #Q = make_diagonal(fill(0.001, nhidden)) 24 | e = 0.001; Q = [e 0 0 0; 0 e 0 0; 0 0 e 0; 0 0 0 e] 25 | #R = PDiagMat(fill(1.0, nobs)) 26 | #R = make_diagonal(fill(1.0, nobs)) 27 | e = 1.0; R = [e 0; 0 e] 28 | mu0 = [8, 10, 1, 0.0]; 29 | #V0 = PDiagMat(fill(1, nhidden)) 30 | #V0 = make_diagonal(fill(1, nhidden)) 31 | e = 1.0; V0 = [e 0 0 0; 0 e 0 0; 0 0 e 0; 0 0 0 e] 32 | params = (mu0 = mu0, V0 = V0, F = F, H = H, Q = Q, R = R) 33 | return params 34 | end 35 | 36 | function lin_gauss_ssm_sample(T, params) 37 | # z(t+1) = F * z(t-1) + N(0,Q) so F is D*D 38 | # y(t) = H * z(t) + N(0, R) so H is O*D 39 | # Returns zs: H*T, ys: O*T 40 | F = params.F; H = params.H; Q = params.Q; R = params.R; mu0 = params.mu0; V0 = params.V0; 41 | nobs, nhidden = size(H) 42 | zs = Array{Float64}(undef, nhidden, T) 43 | ys = Array{Float64}(undef, nobs, T) 44 | prior_z = Distributions.MvNormal(mu0, V0) 45 | process_noise_dist = Distributions.MvNormal(Q) 46 | obs_noise_dist = Distributions.MvNormal(R) 47 | zs[:,1] = rand(prior_z) 48 | ys[:,1] = H*zs[:,1] + rand(obs_noise_dist) 49 | for t=2:T 50 | zs[:,t] = F*zs[:,t-1] + rand(process_noise_dist) 51 | ys[:,t] = H*zs[:,t] + rand(obs_noise_dist) 52 | end 53 | return zs, ys 54 | end 55 | 56 | function convert_2d_matrix_to_vec_of_vec(M::Array{Float64,2}) 57 | N, T = size(M) 58 | v = Vector{Vector{Float64}}(undef, T) 59 | for i=1:T 60 | v[i] = M[:,i] 61 | end 62 | return v 63 | end 64 | 65 | function make_data(T, params) 66 | nobs, nhidden = size(params.H) 67 | (zs, ys) = lin_gauss_ssm_sample(T, params) 68 | #zs = randn(nhidden, T) 69 | #ys = randn(nobs, T) 70 | # ForneyLab cannot handle matrix observations 71 | # https://github.com/biaslab/ForneyLab.jl/issues/17 72 | y_data = convert_2d_matrix_to_vec_of_vec(ys) 73 | end 74 | 75 | 76 | function make_factor_graph(T, params) 77 | F = params.F; H = params.H; Q = params.Q; R = params.R; mu0 = params.mu0; V0 = params.V0; 78 | g = FactorGraph() 79 | @RV x0 ~ GaussianMeanVariance(mu0, V0) 80 | x = Vector{Variable}(undef, T) 81 | y = Vector{Variable}(undef, T) 82 | x_t_prev = x0 83 | for t = 1:T 84 | #global x_t_prev 85 | mu_x = F * x_t_prev 86 | @RV [id=:x_*t] x[t] ~ GaussianMeanVariance(mu_x, Q) 87 | #@RV x[t] ~ GaussianMeanVariance(mu_x, Q) 88 | #x[t].id = Symbol("x_", t) #:x_t; 89 | mu_y = H * x[t] 90 | @RV y[t] ~ GaussianMeanVariance(mu_y, R) 91 | nobs, nhidden = size(H) 92 | placeholder(y[t], :y, dims=(nobs,), index=t) 93 | x_t_prev = x[t] 94 | end 95 | fg = (x0 = x0, x = x, y = y) 96 | return fg 97 | end 98 | 99 | 100 | function make_fg_inference(fg) 101 | println("generating code") 102 | algo = Meta.parse(sumProductAlgorithm(fg.x)) 103 | eval(algo) # Compile the step! function 104 | function infer(y_data) 105 | T = length(y_data) 106 | D = length(y_data[1]) 107 | id = Symbol("y") 108 | data = Dict(id => y_data) 109 | #data = Dict(fg.y => y_data) 110 | println("running forwards backwards") 111 | marginals = step!(data) 112 | println("computing marginals") 113 | m_x = [mean(marginals[:x_*t]) for t = 1:T] # T of D arrays 114 | V_x = [cov(marginals[:x_*t]) for t = 1:T] # T of DxD arrays 115 | return m_x, V_x 116 | end 117 | return infer 118 | end 119 | 120 | # Source: 121 | # https://github.com/QuantEcon/QuantEcon.lectures.code/blob/master/kalman/gaussian_contours.jl 122 | function bivariate_normal(X::Matrix, Y::Matrix, σ_x::Real=1.0, σ_y::Real=1.0, 123 | μ_x::Real=0.0, μ_y::Real=0.0, σ_xy::Real=0.0) 124 | Xμ = X .- μ_x 125 | Yμ = Y .- μ_y 126 | ρ = σ_xy/(σ_x*σ_y) 127 | z = Xμ.^2/σ_x^2 + Yμ.^2/σ_y^2 - 2*ρ.*Xμ.*Yμ/(σ_x*σ_y) 128 | denom = 2π*σ_x*σ_y*sqrt(1-ρ^2) 129 | return exp.(-z/(2*(1-ρ^2))) ./ denom 130 | end 131 | 132 | 133 | # https://github.com/probml/pmtk3/blob/master/matlabTools/graphics/gaussPlot2d.m 134 | function plot_gauss2d(m, C) 135 | U = eigvecs(C) 136 | D = eigvals(C) 137 | N = 100 138 | t = range(0, stop=2*pi, length=N) 139 | xy = zeros(Float64, 2, N) 140 | xy[1,:] = cos.(t) 141 | xy[2,:] = sin.(t) 142 | k = sqrt(6) # approx sqrt(chi2inv(0.95, 2)) = 2.45 143 | k = 0.5 # random hack to make ellipses smaller 144 | w = (k * U * Diagonal(sqrt.(D))) * xy # 2*N 145 | Plots.scatter!([m[1]], [m[2]], marker=:star) 146 | handle = plot!(w[1,:] .+ m[1], w[2,:] .+ m[2]) 147 | return handle 148 | end 149 | 150 | function plot_gauss2d_test() 151 | m = [0.0, 0.0] 152 | #C = randn(2,2); C = C*C'; 153 | C = [1.0 0.0; 0.0 3.0]; 154 | @test isposdef(C) 155 | Plots.scatter([m[1]], [m[2]], marker=:star) 156 | plot_2dgauss(m, C) 157 | end 158 | 159 | 160 | function compute_gauss_marginals(m_x, V_x, keep) 161 | mm_x = [m_x[t][keep] for t=1:T] 162 | VV_x = [V_x[t][keep, keep] for t=1:T] 163 | return mm_x, VV_x 164 | end 165 | 166 | 167 | Random.seed!(1) 168 | T = 5 169 | model = make_params() # cannot use "params" because of "Flux.params" 170 | y_data = make_data(T, model) # T vector of D vectors 171 | ys = hcat(y_data...) # D*T matrix 172 | fg = make_factor_graph(T, model) 173 | infer = make_fg_inference(fg) 174 | mz, Vz = infer(y_data) 175 | mz2, Vz2 = compute_gauss_marginals(mz, Vz, 1:2) 176 | 177 | using Plots; pyplot() 178 | closeall() 179 | plt = scatter(ys[1,:], ys[2,:], label="obs", reuse=false) 180 | xlims!(minimum(ys[1,:])-1, maximum(ys[1,:])+1) 181 | ylims!(minimum(ys[2,:])-1, maximum(ys[2,:])+1) 182 | display(plt) 183 | for t=1:T 184 | plt = plot_gauss2d(mz2[t], Vz2[t]) 185 | #plt = annotate!(mz2[t][1], mz2[t][2], "t=$t"); 186 | display(plt) 187 | end 188 | -------------------------------------------------------------------------------- /Old/rosenbrock-opt-demo.jl: -------------------------------------------------------------------------------- 1 | # Apply various optimization algorithsm to 2d objectives. 2 | 3 | # For the rosenbrock function. 4 | # See this webpage for a cool interactive demo of various methods 5 | #https://bl.ocks.org/EmilienDupont/f97a3902f4f3a98f350500a3a00371db 6 | 7 | # For the quadratic, we follow the parameters from 8 | #http://people.duke.edu/~ccc14/sta-663-2018/notebooks/S09G_Gradient_Descent_Optimization.html 9 | 10 | 11 | #import Flux 12 | import Optim 13 | #using Test 14 | using LinearAlgebra 15 | 16 | include("utils.jl") 17 | include("first-order.jl") 18 | include("second-order.jl") 19 | 20 | 21 | ##################### 22 | 23 | f(x) = rosenbrock(x, a=1, b=5) 24 | ∇f(x) = rosenbrock_grad(x, a=1, b=5) 25 | ftuple = (x,y) -> f([x,y]) 26 | xdomain = range(-2, stop=2, length=100) 27 | ydomain = range(-2, stop=2, length=100) 28 | x0 = [-1.0, -1.0] 29 | levels=[1,2,3,5,10,20,50,100] 30 | fn_name = "rosen-a1b5" 31 | 32 | #= 33 | # To generate figs 5-5 to 5-7 of A4O we use this pseudo-rosenbrock function 34 | # (note the 4x[2]) 35 | f = x -> (1-x[1])^2 + 100*(4x[2] - x[1]^2)^2 36 | ∇f = x -> [2*(200x[1]^3 - 800x[1]*x[2] + x[1] - 1), -800*(x[1]^2 - 4x[2])] 37 | #f = rosenbrock 38 | #∇f = rosenbrock_grad 39 | xdomain = range(-3, stop=2, length=100) 40 | ydomain = range(-0.5, stop=2, length=100) 41 | x0 = [-2,1.5] 42 | levels=[2,10,50,200,500] 43 | fn_name = "rosen2-a1b100" 44 | =# 45 | 46 | #= 47 | f(x) = rosenbrock(x, a=1, b=100) 48 | ∇f(x) = rosenbrock_grad(x, a=1, b=100) 49 | xdomain = range(-4, stop=4, length=200) 50 | ydomain = range(-4, stop=4, length=200) 51 | x0 = [4.0, -4.0] 52 | levels=range(1,stop=10,length=10) .^ 5 53 | fn_name = "rosen-a1b100" 54 | =# 55 | 56 | #= 57 | # Ill-conditioned quadratic 58 | f(x) = x[1]^2 + 100*x[2]^2 59 | ∇f(x) = [2*x[1], 200*x[2]] 60 | xdomain = range(-1, stop=1, length=100) 61 | ydomain = xdomain 62 | x0 = [-1.0, -1.0] 63 | levels = [0.1,1,2,4,9, 16, 25, 36, 49, 64, 81, 100] 64 | fn_name = "quad" 65 | =# 66 | 67 | ######################### 68 | 69 | N = 50 70 | expts = Tuple{DescentMethod, String}[] 71 | 72 | #values chosen for rosenbrock(a=1,b=5) 73 | #push!(expts, (GradientDescentLineSearch(), "GD-linesearch")) 74 | #push!(expts, (GradientDescent(0.01), "GD-0.01")) 75 | #push!(expts, (GradientDescent(0.05), "GD-0.05")) 76 | push!(expts, (Momentum(0.005), "momentum")) 77 | push!(expts, (NesterovMomentum(0.005), "nesterov")) 78 | push!(expts, (Adagrad(0.2), "Adagrad")) 79 | push!(expts, (RMSProp(0.2), "RMSProp")) 80 | push!(expts, (Adadelta(), "AdaDelta")) 81 | push!(expts, (Adam(0.2), "Adam")) 82 | push!(expts, (HyperGradientDescent(0.05, 2e-5), "hyper-GD")) 83 | push!(expts, (BFGS(), "BFGS")) 84 | push!(expts, (LBFGS(5), "LBFGS")) 85 | push!(expts, (DFP(), "DFP")) 86 | 87 | 88 | #= 89 | # values chosen for quadratic 90 | push!(expts, (GradientDescent(1e-3), "gradient descent (1e-3)")) 91 | push!(expts, (GradientDescent(1e-2), "gradient descent (1e-2)")) 92 | push!(expts, (GradientDescentLineSearch(), "gradient descent + linesearch")) 93 | push!(expts, (Momentum(1e-2), "momentum")) 94 | #push!(expts, (NesterovMomentum(0.0002, 0.92, zeros(2)), "Nesterov momentum")) 95 | #push!(expts, (HyperGradientDescent(0.0004, 8e-13, NaN, zeros(2)), "hypermomentum")) 96 | #push!(expts, (HyperNesterovMomentum(0.00023, 1e-12, 0.93, zeros(2), NaN, zeros(2)), "hyper-Nesterov")) 97 | =# 98 | 99 | 100 | #= 101 | # values chosen for rosenbrock(a=1, b=100) 102 | push!(expts, (GradientDescent(1e-4), "gradient descent (0.0001)")) 103 | push!(expts, (GradientDescent(3e-4), "gradient descent (0.0003)")) 104 | push!(expts, (GradientDescentLineSearch(), "gradient descent + linesearch")) 105 | push!(expts, (Momentum(1e-5), "momentum")) 106 | #push!(expts, (NesterovMomentum(0.0002, 0.92, zeros(2)), "Nesterov momentum")) 107 | #push!(expts, (HyperGradientDescent(0.0004, 8e-13, NaN, zeros(2)), "hypermomentum")) 108 | #push!(expts, (HyperNesterovMomentum(0.00023, 1e-12, 0.93, zeros(2), NaN, zeros(2)), "hyper-Nesterov")) 109 | =# 110 | 111 | #= 112 | # values chosen for pseudo-rosenbrock 113 | push!(expts, (GradientDescent(0.0003), "gradient descent (0.0003)")) 114 | push!(expts, (GradientDescent(0.003), "gradient descent (0.003)")) 115 | push!(expts, (GradientDescentLineSearch(), "gradient descent + linesearch")) 116 | #push!(expts, (Momentum(0.0003, 0.9, zeros(2)), "momentum")) 117 | #push!(expts, (NesterovMomentum(0.0002, 0.92, zeros(2)), "Nesterov momentum")) 118 | #push!(expts, (HyperGradientDescent(0.0004, 8e-13, NaN, zeros(2)), "hypermomentum")) 119 | #push!(expts, (HyperNesterovMomentum(0.00023, 1e-12, 0.93, zeros(2), NaN, zeros(2)), "hyper-Nesterov")) 120 | =# 121 | 122 | using Plots 123 | pyplot() 124 | for (M, method_name) in expts 125 | println("using $method_name") 126 | plt = contour(xdomain, ydomain, (x,y)->f([x,y]), levels=levels, reuse=false) 127 | scatter!([x0[1]], [x0[2]], marker=:circle, markercolor=:black, 128 | markersize=5, label="start") 129 | scatter!([1.0], [1.0], marker=:star, markercolor=:red, 130 | markersize=5, label="opt") 131 | 132 | pts = [x0] 133 | ftrace = [f(x0)] 134 | function cb(M, fn, grad, x, iter) 135 | y = fn(x) 136 | g = grad(x) 137 | gnorm = norm(g) 138 | #println("$name, iter=$iter, f=$y, gnorm=$gnorm") 139 | if abs(y)>1e10 || abs(gnorm)>1e10 || abs(gnorm) < 1e-2; return true; end 140 | push!(pts, x) 141 | push!(ftrace, y) 142 | return false 143 | end 144 | xfinal = run_descent_method(M, f, ∇f, x0; max_iter=N, callback=cb) 145 | xtrace = hcat(pts...) # 2xN 146 | #println(ftrace) 147 | plot!(xtrace[1,:], xtrace[2,:], label=method_name, linewidth=3, linecolor=:red) 148 | display(plt) 149 | savefig(plt, "Figures/$fn_name-$method_name-x.pdf") 150 | plt = plot(ftrace, label=method_name; yaxis=:log) 151 | display(plt) 152 | savefig(plt, "Figures/$fn_name-$method_name-f.pdf") 153 | end 154 | 155 | 156 | #Optiml.jl has a different interface 157 | # See https://julianlsolvers.github.io/Optim.jl/stable/#user/minimization/ 158 | function apply_LBFGS(f, ∇f, x0, N) 159 | solver = Optim.LBFGS(;linesearch = Optim.BackTracking()) 160 | opts = Optim.Options(iterations = N, store_trace=true, extended_trace=true) 161 | result = Optim.optimize(rosenbrock, rosenbrock_grad, x0, solver, opts; inplace=false) 162 | xopt = result.minimizer 163 | xtrace = Optim.x_trace(result) # N-vector of D-vectors 164 | N = length(xtrace) 165 | D = length(xtrace[1]) 166 | xs = hcat(xtrace...) #DxN 167 | ftrace = Optim.f_trace(result) 168 | return xopt, xs, ftrace 169 | end 170 | 171 | xopt, xtrace, ftrace = apply_LBFGS(f, ∇f, x0, N) 172 | method_name = "LBFGS-Optim" 173 | plt = contour(xdomain, ydomain, (x,y)->f([x,y]), levels=levels, reuse=false) 174 | scatter!([x0[1]], [x0[2]], marker=:circle, markercolor=:black, 175 | markersize=5, label="start") 176 | scatter!([1.0], [1.0], marker=:star, markercolor=:red, 177 | markersize=5, label="opt") 178 | plot!(xtrace[1,:], xtrace[2,:], label=method_name, linewidth=3, linecolor=:red) 179 | display(plt) 180 | savefig(plt, "Figures/$fn_name-$method_name-x.pdf") 181 | plt = plot(ftrace, label=method_name; yaxis=:log) 182 | display(plt) 183 | savefig(plt, "Figures/$fn_name-$method_name-f.pdf") 184 | -------------------------------------------------------------------------------- /Old/dummy.jl: -------------------------------------------------------------------------------- 1 | # scratch file 2 | 3 | # Latent dimensionality, # hidden units. 4 | Dz, Dh = 5, 500 5 | 6 | # Components of recognition model / "encoder" MLP. 7 | A, μ, logσ = Dense(28^2, Dh, tanh), Dense(Dh, Dz), Dense(Dh, Dz) 8 | g(X) = (h = A(X); (μ(h), logσ(h))) 9 | z(μ, logσ) = μ + exp(logσ) * randn(Float32) 10 | 11 | # Generative model / "decoder" MLP. 12 | f = Chain(Dense(Dz, Dh, tanh), Dense(Dh, 28^2, σ)) 13 | 14 | 15 | 16 | using Distributed 17 | # parallel loops: 18 | 19 | Sys.CPU_THREADS 20 | 21 | # parallel maps: 22 | using LinearAlgebra 23 | function rank_marray() 24 | marr = [rand(1000,1000) for i=1:10] 25 | println(map(rank, marr)) 26 | end 27 | @benchmark rank_marray() 28 | 29 | # memory estimate: 158.40 MiB 30 | # allocs estimate: 202 31 | # -------------- 32 | # minimum time: 5.659 s (3.19% GC) 33 | # median time: 5.659 s (3.19% GC) 34 | # mean time: 5.659 s (3.19% GC) 35 | # maximum time: 5.659 s (3.19% GC) 36 | # -------------- 37 | # samples: 1 38 | # evals/sample: 1 39 | # 40 | function prank_marray() 41 | marr = [rand(1000,1000) for i=1:10] 42 | println(pmap(rank, marr)) 43 | end 44 | @benchmark prank_marray() 45 | # BenchmarkTools.Trial: 46 | # memory estimate: 76.34 MiB 47 | # allocs estimate: 1128 48 | # -------------- 49 | # minimum time: 3.517 s (0.82% GC) 50 | # median time: 3.643 s (2.44% GC) 51 | # mean time: 3.643 s (2.44% GC) 52 | # maximum time: 3.769 s (3.95% GC) 53 | # -------------- 54 | # samples: 2 55 | # evals/sample: 1 56 | # 57 | 58 | using BenchmarkTools 59 | 60 | fmap(x) = map(x -> 2x, x) 61 | fcomprehension(x) = [2x for x in x] 62 | fdot(x) = 2 .* x 63 | function floop(x) 64 | y = similar(x) 65 | for i in eachindex(x) 66 | y[i] = 2*x[i] 67 | end 68 | return y 69 | end 70 | function floopopt(x) 71 | y = similar(x) 72 | @simd for i in eachindex(x) 73 | @inbounds y[i] = 2*x[i] 74 | end 75 | return y 76 | end 77 | 78 | x = rand(1000) 79 | @btime fmap($x) 80 | @btime fcomprehension($x) 81 | @btime fdot($x) 82 | @btime floop($x) 83 | @btime floopopt($x); 84 | 85 | 86 | function buffon(n) 87 | hit = 0 88 | for i = 1:n 89 | mp = rand() 90 | phi = (rand() * pi) - pi / 2 # angle at which needle falls 91 | xright = mp + cos(phi)/2 # x-location of needle 92 | xleft = mp - cos(phi)/2 93 | # if xright >= 1 || xleft <= 0 94 | # hit += 1 95 | # end 96 | # Does needle cross either x == 0 or x == 1? 97 | p = (xright >= 1 || xleft <= 0) ? 1 : 0 98 | hit += p 99 | end 100 | miss = n - hit 101 | piapprox = n / hit * 2 102 | end 103 | 104 | @time buffon(1e8) 105 | # 3.399998 seconds (5 allocations: 176 bytes) 106 | #3.1413701707991235 107 | 108 | function buffon_par(n) 109 | hit = @distributed (+) for i = 1:n 110 | mp = rand() 111 | phi = (rand() * pi) - pi / 2 112 | xright = mp + cos(phi)/2 113 | xleft = mp - cos(phi)/2 114 | (xright >= 1 || xleft <= 0) ? 1 : 0 115 | end 116 | miss = n - hit 117 | piapprox = n / hit * 2 118 | end 119 | 120 | 121 | @time buffon_par(1e8) 122 | #1.536845 seconds (2.61 k allocations: 137.891 KiB) 123 | #3.141489580637215 124 | 125 | 126 | #= 127 | # Starter problems from 128 | # http://ucidatascienceinitiative.github.io/IntroToJulia/Html/BasicProblems 129 | 130 | using Statistics 131 | function binrv(n, p) 132 | nheads = 0 133 | for i=1:n 134 | u = rand() 135 | if u < p; nheads += 1; end 136 | end 137 | return nheads 138 | end 139 | ntrials = 1000 140 | counts = zeros(ntrials) 141 | n = 10; p = 0.5 142 | for i=1:ntrials 143 | counts[i] = binrv(n, p) 144 | end 145 | c = counts ./ n 146 | println("mean of $ntrials trials with pheads=$p is $(mean(c))\n") 147 | @assert isapprox(mean(c), p, atol=0.1) 148 | 149 | 150 | N = 5 151 | using SparseArrays 152 | A = spzeros(N,N) 153 | for i=1:N 154 | j=i; A[i,j]=-2 155 | if i>1; j=i-1; A[i,j]=1; end 156 | if iDistributions.pdf(dist,[x,y]), levels = [s*p]) 237 | if ~isnothing(txt); annotate!(m_x, m_y, txt); end 238 | return plt 239 | end 240 | =# 241 | 242 | 243 | 244 | #= 245 | # StatsModels.jl defines the Formula type and @formula macro 246 | # https://github.com/JuliaStats/StatsModels.jl/blob/4701a1bd221f6281371f254f69c2f95c19e02a92/src/formula.jl 247 | function make_formula_all_features(df, target) 248 | col_symbols = Set(names(df)) 249 | feature_symbols = setdiff(col_symbols, Set([target])) 250 | feature_strings = [string(s) for s in feature_symbols] 251 | all_features = join(feature_strings, " + " ) 252 | formula = string(target) * " ~ " * all_features 253 | return formula 254 | end 255 | 256 | function make_formula_test() 257 | n = 10 258 | df = DataFrame(X1=randn(n), X2=randn(n), Y=randn(n)) 259 | ols = lm(@formula(Y ~ X1 + X2), df) 260 | formula = make_formula_all_features(df, :Y) 261 | f_expr = Meta.parse(formula) 262 | ols2 = lm(@formula(f_expr), df) 263 | end 264 | =# 265 | -------------------------------------------------------------------------------- /Old/kalman_qe.jl: -------------------------------------------------------------------------------- 1 | # Slightly modified from 2 | # https://github.com/QuantEcon/QuantEcon.jl/blob/master/src/kalman.jl 3 | # To map the notation used in MLAPP ch. 18, note that 4 | # - the book uses C, the code uses G for observation model 5 | # - the book uses z, the code uses x for hidden state 6 | # - the book uses mu_{t|t-1}, the code uses cur_x_hat 7 | # - the book uses Sigma_t|t-1}, the code uses cur_sigma 8 | # ie. the state of the KF object is the posterior predictive distribution 9 | 10 | #= 11 | Implements the Kalman filter for a linear Gaussian state space model. 12 | @author : Spencer Lyon 13 | @date: 2014-07-29 14 | References 15 | ---------- 16 | https://lectures.quantecon.org/jl/kalman.html 17 | TODO: Do docstrings here after implementing LinerStateSpace 18 | =# 19 | 20 | mutable struct Kalman 21 | A # transition model 22 | G # observation model 23 | Q # transition noise 24 | R # observation noise 25 | k # num. hidden states 26 | n # num. observed states 27 | cur_x_hat # posterior predictive mean 28 | cur_sigma # posterior predictive covariance 29 | end 30 | 31 | 32 | # Initializes current mean and cov to zeros 33 | function Kalman(A, G, Q, R) 34 | k = size(G, 1) 35 | n = size(G, 2) 36 | xhat = n == 1 ? zero(eltype(A)) : zeros(n) 37 | Sigma = n == 1 ? zero(eltype(A)) : zeros(n, n) 38 | return Kalman(A, G, Q, R, k, n, xhat, Sigma) 39 | end 40 | 41 | 42 | function set_state!(k::Kalman, x_hat, Sigma) 43 | k.cur_x_hat = x_hat 44 | k.cur_sigma = Sigma 45 | Nothing 46 | end 47 | 48 | #= 49 | """ 50 | Updates the moments (`cur_x_hat`, `cur_sigma`) of the time ``t`` prior to the 51 | time ``t`` filtering distribution, using current measurement ``y_t``. 52 | The updates are according to 53 | ```math 54 | \hat{x}^F = \hat{x} + \Sigma G' (G \Sigma G' + R)^{-1} 55 | (y - G \hat{x}) \\ 56 | \Sigma^F = \Sigma - \Sigma G' (G \Sigma G' + R)^{-1} G 57 | \Sigma 58 | ``` 59 | #### Arguments 60 | - `k::Kalman` An instance of the Kalman filter 61 | - `y` The current measurement 62 | """ 63 | =# 64 | function prior_to_filtered!(k::Kalman, y) 65 | # simplify notation 66 | G, R = k.G, k.R 67 | x_hat, Sigma = k.cur_x_hat, k.cur_sigma 68 | 69 | # and then update 70 | if k.k > 1 71 | reshape(y, k.k, 1) 72 | end 73 | A = Sigma * G' 74 | B = G * Sigma * G' + R 75 | M = A / B 76 | k.cur_x_hat = x_hat + M * (y .- G * x_hat) 77 | k.cur_sigma = Sigma - M * G * Sigma 78 | Nothing 79 | end 80 | 81 | """ 82 | Updates the moments of the time ``t`` filtering distribution to the 83 | moments of the predictive distribution, which becomes the time 84 | ``t+1`` prior 85 | #### Arguments 86 | - `k::Kalman` An instance of the Kalman filter 87 | """ 88 | function filtered_to_forecast!(k::Kalman) 89 | # simplify notation 90 | A, Q = k.A, k.Q 91 | x_hat, Sigma = k.cur_x_hat, k.cur_sigma 92 | 93 | # and then update 94 | k.cur_x_hat = A * x_hat 95 | k.cur_sigma = A * Sigma * A' + Q 96 | Nothing 97 | end 98 | 99 | """ 100 | Updates `cur_x_hat` and `cur_sigma` given array `y` of length `k`. The full 101 | update, from one period to the next 102 | #### Arguments 103 | - `k::Kalman` An instance of the Kalman filter 104 | - `y` An array representing the current measurement 105 | """ 106 | function update!(k::Kalman, y) 107 | prior_to_filtered!(k, y) 108 | filtered_to_forecast!(k) 109 | Nothing 110 | end 111 | 112 | 113 | function stationary_values(k::Kalman) 114 | # simplify notation 115 | 116 | A, Q, G, R = k.A, k.Q, k.G, k.R 117 | 118 | # solve Riccati equation, obtain Kalman gain 119 | Sigma_inf = solve_discrete_riccati(A', G', Q, R) 120 | K_inf = A * Sigma_inf * G' * inv(G * Sigma_inf * G' .+ R) 121 | return Sigma_inf, K_inf 122 | end 123 | 124 | """ 125 | computes log-likelihood of period ``t`` 126 | ##### Arguments 127 | - `kn::Kalman`: `Kalman` specifying the model. Current values must be the 128 | forecast for period ``t`` observation conditional on ``t-1`` 129 | observation. 130 | - `y::AbstractVector`: Respondentbservations at period ``t`` 131 | ##### Returns 132 | - `logL::Real`: log-likelihood of observations at period ``t`` 133 | """ 134 | function log_likelihood(k::Kalman, y::AbstractVector) 135 | eta = y - k.G*k.cur_x_hat # forecast error 136 | P = k.G*k.cur_sigma*k.G' + k.R # covariance matrix of forecast error 137 | logL = - (length(y)*log(2pi) + logdet(P) .+ eta'/P*eta)[1]/2 138 | return logL 139 | end 140 | 141 | """ 142 | computes log-likelihood of entire observations 143 | ##### Arguments 144 | - `kn::Kalman`: `Kalman` specifying the model. Initial value must be the prior 145 | for t=1 period observation, i.e. ``x_{1|0}``. 146 | - `y::AbstractMatrix`: `n x T` matrix of observed data. 147 | `n` is the number of observed variables in one period. 148 | Each column is a vector of observations at each period. 149 | ##### Returns 150 | - `logL::Real`: log-likelihood of all observations 151 | """ 152 | function compute_loglikelihood(kn::Kalman, y::AbstractMatrix) 153 | T = size(y, 2) 154 | logL = 0 155 | # forecast and update 156 | for t in 1:T 157 | logL = logL + log_likelihood(kn, y[:, t]) 158 | update!(kn, y[:, t]) 159 | end 160 | return logL 161 | end 162 | 163 | """ 164 | ##### Arguments 165 | - `kn::Kalman`: `Kalman` specifying the model. Initial value must be the prior 166 | for t=1 period observation, i.e. ``x_{1|0}``. 167 | - `y::AbstractMatrix`: `n x T` matrix of observed data. 168 | `n` is the number of observed variables in one period. 169 | Each column is a vector of observations at each period. 170 | ##### Returns 171 | - `x_smoothed::AbstractMatrix`: `k x T` matrix of smoothed mean of states. 172 | `k` is the number of states. 173 | - `logL::Real`: log-likelihood of all observations 174 | - `sigma_smoothed::AbstractArray` `k x k x T` array of smoothed covariance matrix of states. 175 | """ 176 | function smooth(kn::Kalman, y::AbstractMatrix) 177 | G, R = kn.G, kn.R 178 | 179 | T = size(y, 2) 180 | n = kn.n 181 | x_filtered = Matrix{Float64}(undef, n, T) 182 | sigma_filtered = Array{Float64}(undef, n, n, T) 183 | sigma_forecast = Array{Float64}(undef, n, n, T) 184 | logL = 0 185 | # forecast and update 186 | for t in 1:T 187 | logL = logL + log_likelihood(kn, y[:, t]) 188 | prior_to_filtered!(kn, y[:, t]) 189 | x_filtered[:, t], sigma_filtered[:, :, t] = kn.cur_x_hat, kn.cur_sigma 190 | filtered_to_forecast!(kn) 191 | sigma_forecast[:, :, t] = kn.cur_sigma 192 | end 193 | # smoothing 194 | x_smoothed = copy(x_filtered) 195 | sigma_smoothed = copy(sigma_filtered) 196 | for t in (T-1):-1:1 197 | x_smoothed[:, t], sigma_smoothed[:, :, t] = 198 | go_backward(kn, x_filtered[:, t], sigma_filtered[:, :, t], 199 | sigma_forecast[:, :, t], x_smoothed[:, t+1], 200 | sigma_smoothed[:, :, t+1]) 201 | end 202 | 203 | return x_smoothed, logL, sigma_smoothed 204 | end 205 | 206 | """ 207 | ##### Arguments 208 | - `kn::Kalman`: `Kalman` specifying the model. 209 | - `x_fi::Vector`: filtered mean of state for period ``t`` 210 | - `sigma_fi::Matrix`: filtered covariance matrix of state for period ``t`` 211 | - `sigma_fo::Matrix`: forecast of covariance matrix of state for period ``t+1`` 212 | conditional on period ``t`` observations 213 | - `x_s1::Vector`: smoothed mean of state for period ``t+1`` 214 | - `sigma_s1::Matrix`: smoothed covariance of state for period ``t+1`` 215 | ##### Returns 216 | - `x_s1::Vector`: smoothed mean of state for period ``t`` 217 | - `sigma_s1::Matrix`: smoothed covariance of state for period ``t`` 218 | """ 219 | function go_backward(k::Kalman, x_fi::Vector, 220 | sigma_fi::Matrix, sigma_fo::Matrix, 221 | x_s1::Vector, sigma_s1::Matrix) 222 | A = k.A 223 | temp = sigma_fi*A'/sigma_fo 224 | x_s = x_fi + temp*(x_s1-A*x_fi) 225 | sigma_s = sigma_fi + temp*(sigma_s1-sigma_fo)*temp' 226 | return x_s, sigma_s 227 | end 228 | -------------------------------------------------------------------------------- /Old/first-order.jl: -------------------------------------------------------------------------------- 1 | # Modified from 2 | #https://github.com/sisl/algforopt-notebooks/blob/master/first-order.ipynb 3 | 4 | # See also 5 | # https://github.com/FluxML/Flux.jl/blob/master/src/optimise/optimisers.jl 6 | 7 | #include("support_code.jl"); 8 | #using Plots 9 | #using Vec 10 | include("utils.jl") 11 | 12 | # A4O p36 13 | function bracket_minimum(f, x=0; s=1e-2, k=2.0) 14 | a, ya = x, f(x) 15 | b, yb = a + s, f(a + s) 16 | if yb > ya 17 | a, b = b, a 18 | ya, yb = yb, ya 19 | s = -s 20 | end 21 | while true 22 | c, yc = b + s, f(b + s) 23 | if yc > yb 24 | return a < c ? (a, c) : (c, a) 25 | end 26 | a, ya, b, yb = b, yb, c, yc 27 | s *= k 28 | end 29 | end 30 | 31 | # A4O p92 32 | # 1d optimization based on Newton's method 33 | function secant_method(df, x1, x2, ϵ) 34 | df1 = df(x1) 35 | delta = Inf 36 | while abs(delta) > ϵ 37 | df2 = df(x2) 38 | delta = (x2 - x1)/(df2 - df1)*df2 39 | x1, x2, df1 = x2, x2 - delta, df2 40 | end 41 | x2 42 | end 43 | 44 | # A4O p54 45 | function line_search_secant(f, dfn, x, d) 46 | objective = α -> f(x + α*d) 47 | a, b = bracket_minimum(objective) 48 | #α = minimize(objective, a, b) 49 | f1d = a -> begin 50 | xnew = x + a*d 51 | g2 = dfn(xnew) 52 | v1 = dot(g2, d) / norm(d) 53 | return v1 54 | end 55 | α = secant_method(f1d, a, b, 0.0001) 56 | #println("ls secant chose $α") 57 | return α 58 | end 59 | 60 | 61 | 62 | # A40 p56 63 | using LinearAlgebra 64 | function line_search_backtracking(f, ∇f, x, d; α=1.0, p=0.5, β=1e-4, max_iter=50) 65 | y, g = f(x), ∇f(x) 66 | iter = 1 67 | while (iter <= max_iter) && (f(x + α*d) > y + β*α*(g⋅d)) 68 | α *= p 69 | #println("line search iter $iter, $α") 70 | iter += 1 71 | end 72 | #println("backtracking ls chose $α") 73 | return α 74 | end 75 | 76 | import LineSearches 77 | #https://julianlsolvers.github.io/LineSearches.jl/latest/examples/generated/customoptimizer.html 78 | function line_search_backtracking_optim(f, g, x, d) 79 | linesearch_fn = LineSearches.BackTracking(order=3) 80 | #linesearch_fn = LineSearches.HagerZhang() 81 | s = d 82 | ϕ(α) = f(x .+ α.*s) 83 | function dϕ(α) 84 | gvec = g(x .+ α.*s) 85 | return dot(gvec, s) 86 | end 87 | function ϕdϕ(α) 88 | phi = f(x .+ α.*s) 89 | gvec = g(x .+ α.*s) 90 | dphi = dot(gvec, s) 91 | return (phi, dphi) 92 | end 93 | fx = f(x) 94 | gvec = g(x) 95 | dϕ_0 = dot(s, gvec) 96 | α, fx = linesearch_fn(ϕ, dϕ, ϕdϕ, 1.0, fx, dϕ_0) 97 | #println("optim ls chose $α") 98 | return α 99 | end 100 | 101 | 102 | ################# 103 | 104 | 105 | abstract type DescentMethod end 106 | 107 | struct GradientDescent <: DescentMethod 108 | α::Real # learning rate 109 | end 110 | function init!(M::GradientDescent, f, ∇f, x) 111 | return M 112 | end 113 | function step!(M::GradientDescent, f, ∇f, x) 114 | α, g = M.α, ∇f(x) 115 | return x - α*g 116 | end 117 | 118 | 119 | struct GradientDescentLineSearch <: DescentMethod 120 | end 121 | 122 | function init!(M::GradientDescentLineSearch, f, ∇f, x) 123 | return M 124 | end 125 | function step!(M::GradientDescentLineSearch, f, ∇f, x) 126 | d = -normalize(∇f(x)) 127 | α = line_search_backtracking(f, ∇f, x, d) 128 | return x + α*d 129 | end 130 | 131 | struct GradientDescentLineSearchExact <: DescentMethod 132 | end 133 | 134 | function init!(M::GradientDescentLineSearchExact, f, ∇f, x) 135 | return M 136 | end 137 | function step!(M::GradientDescentLineSearchExact, f, ∇f, x) 138 | d = -normalize(∇f(x)) 139 | α = line_search_secant(f, ∇f, x, d) 140 | return x + α*d 141 | end 142 | 143 | mutable struct Momentum <: DescentMethod 144 | α::Real # learning rate 145 | β::Real # momentum decay 146 | v::Vector{Real} # momentum 147 | end 148 | # default constructor 149 | Momentum(α) = Momentum(α, 0.9, Vector{Real}()) 150 | 151 | function init!(M::Momentum, f, ∇f, x) 152 | M.v = zeros(length(x)) 153 | return M 154 | end 155 | function step!(M::Momentum, f, ∇f, x) 156 | α, β, v, g = M.α, M.β, M.v, ∇f(x) 157 | v[:] = β*v - α*g 158 | return x + v 159 | end 160 | 161 | mutable struct NesterovMomentum <: DescentMethod 162 | α::Real # learning rate 163 | β::Real # momentum decay 164 | v::Vector{Real} # momentum 165 | end 166 | # default constructor 167 | NesterovMomentum(α) = NesterovMomentum(α, 0.9, Vector{Real}()) 168 | 169 | function init!(M::NesterovMomentum, f, ∇f, x) 170 | M.v = zeros(length(x)) 171 | return M 172 | end 173 | function step!(M::NesterovMomentum, f, ∇f, x) 174 | α, β, v = M.α, M.β, M.v 175 | v[:] = β*v - α*∇f(x + β*v) 176 | return x + v 177 | end 178 | 179 | 180 | mutable struct Adagrad <: DescentMethod 181 | α::Real # learning rate 182 | ϵ::Real # small value 183 | s::Vector{Real} # sum of squared gradient 184 | end 185 | # default constructor 186 | Adagrad(α) = Adagrad(α, 1e-8, Vector{Real}()) 187 | 188 | function init!(M::Adagrad, f, ∇f, x) 189 | M.s = zeros(length(x)) 190 | return M 191 | end 192 | 193 | function step!(M::Adagrad, f, ∇f, x) 194 | α, ϵ, s, g = M.α, M.ϵ, M.s, ∇f(x) 195 | s[:] += g.*g 196 | return x - α*g ./ (sqrt.(s) .+ ϵ) 197 | end 198 | 199 | mutable struct RMSProp <: DescentMethod 200 | α::Real # learning rate 201 | γ::Real # decay 202 | ϵ::Real # small value 203 | s::Vector{Real} # sum of squared gradient 204 | end 205 | #default constructor 206 | RMSProp(α) = RMSProp(α, 0.9, 1e-8, Vector{Real}()) 207 | 208 | function init!(M::RMSProp, f, ∇f, x) 209 | M.s = zeros(length(x)) 210 | return M 211 | end 212 | function step!(M::RMSProp, f, ∇f, x) 213 | α, γ, ϵ, s, g = M.α, M.γ, M.ϵ, M.s, ∇f(x) 214 | s[:] = γ*s + (1-γ)*(g.*g) 215 | return x - α*g ./ (sqrt.(s) .+ ϵ) 216 | end 217 | 218 | mutable struct Adadelta <: DescentMethod 219 | γs::Real # gradient decay 220 | γx::Real # update decay 221 | ϵ::Real # small value 222 | s::Vector{Real} # sum of squared gradients 223 | u::Vector{Real} # sum of squared updates 224 | end 225 | # default constructor 226 | Adadelta() = Adadelta(0.9, 0.9, 1e-8, Vector{Real}(), Vector{Real}()) 227 | 228 | function init!(M::Adadelta, f, ∇f, x) 229 | M.s = zeros(length(x)) 230 | M.u = zeros(length(x)) 231 | return M 232 | end 233 | function step!(M::Adadelta, f, ∇f, x) 234 | γs, γx, ϵ, s, u, g = M.γs, M.γx, M.ϵ, M.s, M.u, ∇f(x) 235 | s[:] = γs*s + (1-γs)*g.*g 236 | Δx = - (sqrt.(u) .+ ϵ) ./ (sqrt.(s) .+ ϵ) .* g 237 | u[:] = γx*u + (1-γx)*Δx.*Δx 238 | return x + Δx 239 | end 240 | 241 | mutable struct Adam <: DescentMethod 242 | α::Real # learning rate 243 | γv::Real # decay 244 | γs::Real # decay 245 | ϵ::Real # small value 246 | k::Int # step counter 247 | v::Vector{Real} # 1st moment estimate 248 | s::Vector{Real} # 2nd moment estimate 249 | end 250 | # default constructor 251 | Adam(α) = Adam(α, 0.9, 0.999, 1e-8, 0, Vector{Real}(), Vector{Real}()) 252 | 253 | function init!(M::Adam, f, ∇f, x) 254 | M.k = 0 255 | M.v = zeros(length(x)) 256 | M.s = zeros(length(x)) 257 | return M 258 | end 259 | function step!(M::Adam, f, ∇f, x) 260 | α, γv, γs, ϵ, k = M.α, M.γv, M.γs, M.ϵ, M.k 261 | s, v, g = M.s, M.v, ∇f(x) 262 | v[:] = γv*v + (1-γv)*g 263 | s[:] = γs*s + (1-γs)*g.*g 264 | M.k = k += 1 265 | v_hat = v ./ (1 - γv^k) 266 | s_hat = s ./ (1 - γs^k) 267 | return x - α*v_hat ./ (sqrt.(s_hat) .+ ϵ) 268 | end 269 | 270 | mutable struct HyperGradientDescent <: DescentMethod 271 | α0::Real # initial learning rate 272 | μ::Real # learning rate of the learning rate 273 | α::Real # current learning rate 274 | g_prev::Vector{Real} # previous gradient 275 | end 276 | # default constructor 277 | HyperGradientDescent(α0, μ) = HyperGradientDescent(α0, μ, NaN, Vector{Real}()) 278 | 279 | function init!(M::HyperGradientDescent, f, ∇f, x) 280 | M.α = M.α0 281 | M.g_prev = zeros(length(x)) 282 | return M 283 | end 284 | function step!(M::HyperGradientDescent, f, ∇f, x) 285 | α, μ, g, g_prev = M.α, M.μ, ∇f(x), M.g_prev 286 | α = α + μ*(g⋅g_prev) 287 | M.g_prev, M.α = g, α 288 | return x - α*g 289 | end 290 | 291 | 292 | ##### 293 | 294 | function run_descent_method(M::DescentMethod, f, ∇f, x0; 295 | max_iter = 100, 296 | callback = (M, f, ∇f, x, iter) -> norm(∇f(x), Inf) < 1e-4) 297 | x = x0 298 | done = false 299 | init!(M, f, ∇f, x) 300 | for iter = 1:max_iter 301 | x = step!(M, f, ∇f, x); 302 | done = callback(M, f, ∇f, x, iter) 303 | if done; break; end 304 | end 305 | return x 306 | end 307 | 308 | #################### 309 | -------------------------------------------------------------------------------- /Old/code_breaker.jl: -------------------------------------------------------------------------------- 1 | 2 | # See the explanation in this doc: 3 | #https://docs.google.com/document/d/1nTpmZhs-VxpJivrQKIVnvmelc02J0F1ki5ChU0EyG68/edit?usp=sharing 4 | 5 | include("utils.jl") 6 | 7 | """ 8 | make_code_prior(L, A, partial_code) 9 | 10 | Make uniform prior over codewords of length `L` over alphabet `A` given a 11 | `partial_code` vector, which is 0 if element is unknown, and otherwise 12 | contains true value of code at that location. 13 | """ 14 | function make_code_prior(L, A, partial_code) 15 | shape = Tuple(fill(A, L)) 16 | ncodes = A^L 17 | codes = [Base._ind2sub(shape, i) for i in 1:ncodes] 18 | prior = ones(ncodes) 19 | for i = 1:ncodes 20 | for l=1:L 21 | if partial_code[l]>0 && (codes[i][l] != partial_code[l]) 22 | prior[i] = 0 23 | end 24 | end 25 | end 26 | prior = normalize_probdist(prior); 27 | return prior 28 | end 29 | 30 | function make_code_prior_test() 31 | L = 2; A = 3; 32 | partial_code = [0, 2] 33 | code_prior = make_code_prior(L, A, partial_code) 34 | #= Possible codes are as follows, ones compatible with partial_code with * 35 | 1 1 36 | 2 1 37 | 3 1 38 | 1 2 * 39 | 2 2 * 40 | 3 2 * 41 | 1 3 42 | 2 3 43 | 3 3 44 | =# 45 | @test isapprox(code_prior, [0, 0, 0, 0.3, 0.3, 0.3, 0, 0, 0]; atol=1e-1) 46 | end 47 | 48 | #make_code_prior_test() 49 | 50 | function encode_discrete(v, domain) 51 | ndx = findfirst(x -> x==v, domain) 52 | @assert !isnothing(ndx) "value $v not in domain $domain" 53 | return ndx 54 | end 55 | 56 | function hamming_distance(x, c) 57 | return sum(x .!= c) 58 | end 59 | 60 | function hamming_distance_ints(xndx, cndx, L, A) 61 | shape = Tuple(fill(A, L)) 62 | xbits = Base._ind2sub(shape, xndx) 63 | cbits = Base._ind2sub(shape, cndx) 64 | return hamming_distance(xbits, cbits) 65 | end 66 | 67 | 68 | """ 69 | make_CPT_from_fn(f, domain1, domain2, frange) 70 | 71 | Make a conditional proability table from a deterministic 2 argument function 72 | `f` with inputs x1 in `domain1`, x2 in `domain2` and output y in `frange`. 73 | 74 | We assume the domains and range are finite sets. Returns array of form 75 | cpt[x1,x2,y], where the last dimension sums to 1 for each x1, x2. 76 | """ 77 | function make_CPT_from_fn(f, domain1, domain2, frange) 78 | cpt = zeros(length(domain1), length(domain2), length(frange)) 79 | for (x1v, x1i) in enumerate(domain1) 80 | for (x2v, x2i) in enumerate(domain2) 81 | yv = f(x1v, x2v) 82 | yi = encode_discrete(yv, frange) 83 | cpt[x1i, x2i, yi] = 1.0 84 | end 85 | end 86 | return cpt 87 | end 88 | 89 | 90 | function make_hamming_CPT(L, A) 91 | domain = 1:A^L # strings of length L on alphabet A 92 | frange = 0:L # possible values of hamming distance 93 | f(x1, x2) = hamming_distance_ints(x1, x2, L, A) 94 | CPT = make_CPT_from_fn(f, domain, domain, frange) 95 | return CPT 96 | end 97 | 98 | function make_hamming_CPT_test() 99 | L = 2; A = 3; 100 | CPT = make_hamming_CPT(L, A) 101 | @test CPT[:,1,:] == [ 102 | 1 0 0; # hamming([1,1], [1,1])=1 103 | 0 1 0; 104 | 0 1 0; 105 | 0 1 0; 106 | 0 0 1; # hamming([2,2], [1,1])=2 107 | 0 0 1; 108 | 0 1 0; 109 | 0 0 1; 110 | 0 0 1 111 | ] 112 | end 113 | 114 | """ 115 | marginalize_CPT_parent(child_CPT, parent_prior) 116 | 117 | Compute cpt(p1, c) = sum_p2 child_CPT(p1, p2, c) * parent_prior(p1). 118 | """ 119 | function marginalize_CPT_parent(child_CPT, parent_prior) 120 | @assert ndims(child_CPT) == 3 121 | (p1sz, p2sz, csz) = size(child_CPT) 122 | ppsz = length(parent_prior) 123 | errmsg = "parent prior has size $ppsz, should be $p2sz" 124 | @assert (length(parent_prior) == p2sz) errmsg 125 | CPT = zeros(p1sz, csz) 126 | for p1 in 1:p1sz 127 | for c in 1:csz 128 | CPT[p1, c] = sum(child_CPT[p1, :, c] .* parent_prior) 129 | end 130 | end 131 | return CPT 132 | end 133 | 134 | """ 135 | We make a surrogate model for the function y=f(x), 136 | where x is a string in {1,..,A}^L. Our surrogate has the form 137 | fhat(x) = argmax_y CPT_y_x[x, y] where 138 | CPT_y_x[x,y] = sum_c CPT_c[c] * CPT_y_xc[x,c,y] 139 | where CPT_c is a prior over hidden code c. 140 | We assume f(x) in {0,1,...,L}. 141 | """ 142 | mutable struct SurrogateModel 143 | CPT_c::Array{Float64,1} 144 | CPT_y_xc::Array{Float64, 3} 145 | CPT_y_x::Array{Float64, 2} 146 | L::Int # length of string 147 | A::Int # alphabet size 148 | fdom::Array{Int,1} # domain of unknown function 149 | frange::Array{Int,1} # range of unknown function 150 | end 151 | 152 | 153 | function encode_x(model, xstr) 154 | xdomain = Tuple(fill(model.A, model.L)) 155 | xndx = Base._sub2ind(xdomain, xstr...) 156 | return xndx 157 | end 158 | 159 | function decode_x(model, xndx) 160 | xdomain = Tuple(fill(model.A, model.L)) 161 | xstr = Base._ind2sub(xdomain, xndx) 162 | return xstr 163 | end 164 | 165 | function encode_y(model, yval) 166 | yndx = encode_discrete(yval, model.frange) 167 | end 168 | 169 | 170 | function make_surrogate(L, A, partial_code) 171 | CPT_c = make_code_prior(L, A, partial_code) 172 | CPT_y_xc = make_hamming_CPT(L, A) 173 | CPT_y_x = marginalize_CPT_parent(CPT_y_xc, CPT_c) 174 | model = SurrogateModel(CPT_c, CPT_y_xc, CPT_y_x, L, A, 1:A^L, 0:L) 175 | return model 176 | end 177 | 178 | function make_surrogate_test() 179 | L = 3; A = 4; partial_code = [1,1,1] 180 | model = make_surrogate(L, A, partial_code) 181 | # Prob the model with query strings x which are increasignly 182 | # far (in Hamming Distance) from the true code of (1,1,1) 183 | xs = [ [1,1,1], [2,1,1], [2,2,1], [2,2,2] ] 184 | ps = [ [1.0,0,0,0], [0,1.0,0,0], [0,0,1.0,0], [0,0,0,1.0]] # 0-3 bits away 185 | for i = 1:length(xs) 186 | xstr = xs[i] 187 | prob_y = ps[i] 188 | xndx = encode_x(model, xstr) 189 | CPT_y = model.CPT_y_x[xndx,:] 190 | @test isapprox(CPT_y, prob_y; atol=0.1) 191 | end 192 | println("tests passed") 193 | end 194 | 195 | function update_surrogate!(model, xstr, yval) 196 | xdomain = fill(model.A, model.L) 197 | xndx = encode_x(model, xstr) 198 | yndx = encode_y(model, yval) 199 | lik_c = model.CPT_y_xc[xndx, :, yndx] 200 | post_c = normalize_probdist(lik_c .* model.CPT_c) 201 | model.CPT_c = post_c 202 | model.CPT_y_x = marginalize_CPT_parent(model.CPT_y_xc, model.CPT_c) 203 | end 204 | 205 | function update_surrogate_test() 206 | # Start with uniform prior. 207 | # Make a lucky guess, which has f(x)=0, which implies code=x 208 | xs = [ [1,1,1], [1,2,3]] 209 | for xstr in xs 210 | L = 3; A = 4; partial_code = [0,0,0] 211 | model = make_surrogate(L, A, partial_code) 212 | xcode = encode_x(model, xstr) 213 | yval = 0 214 | update_surrogate!(model, xstr, yval) 215 | expected_post = zeros(A^L); 216 | expected_post[xcode] = 1.0 217 | @test isapprox(model.CPT_c, expected_post, atol=0.1) 218 | end 219 | println("tests passed") 220 | end 221 | 222 | 223 | function expected_improvement(model, xstr, incumbent_val) 224 | xndx = encode_x(model, xstr) 225 | CPT_y = model.CPT_y_x[xndx,:] 226 | fn(y) = max(0, incumbent_val-y) # if y, rounded corners=5mm}}") 39 | pushPGFPlotsPreamble("\\newcommand{\\vect}[1]{\\boldsymbol{\\mathbf{#1}}}") 40 | pushPGFPlotsPreamble("\\pgfplotsset{") 41 | pushPGFPlotsPreamble(" colormap={pasteljet}{") 42 | pushPGFPlotsPreamble(" rgb=(0.99325,0.90616,0.14394)") 43 | pushPGFPlotsPreamble(" rgb=(0.98387,0.90487,0.13690)") 44 | pushPGFPlotsPreamble(" rgb=(0.97442,0.90359,0.13021)") 45 | pushPGFPlotsPreamble(" rgb=(0.96489,0.90232,0.12394)") 46 | pushPGFPlotsPreamble(" rgb=(0.95530,0.90107,0.11813)") 47 | pushPGFPlotsPreamble(" rgb=(0.94564,0.89982,0.11284)") 48 | pushPGFPlotsPreamble(" rgb=(0.93590,0.89857,0.10813)") 49 | pushPGFPlotsPreamble(" rgb=(0.92611,0.89733,0.10407)") 50 | pushPGFPlotsPreamble(" rgb=(0.91624,0.89609,0.10072)") 51 | pushPGFPlotsPreamble(" rgb=(0.90631,0.89485,0.09813)") 52 | pushPGFPlotsPreamble(" rgb=(0.89632,0.89362,0.09634)") 53 | pushPGFPlotsPreamble(" rgb=(0.88627,0.89237,0.09537)") 54 | pushPGFPlotsPreamble(" rgb=(0.87617,0.89112,0.09525)") 55 | pushPGFPlotsPreamble(" rgb=(0.86601,0.88987,0.09595)") 56 | pushPGFPlotsPreamble(" rgb=(0.85581,0.88860,0.09745)") 57 | pushPGFPlotsPreamble(" rgb=(0.84556,0.88732,0.09970)") 58 | pushPGFPlotsPreamble(" rgb=(0.83527,0.88603,0.10265)") 59 | pushPGFPlotsPreamble(" rgb=(0.82494,0.88472,0.10622)") 60 | pushPGFPlotsPreamble(" rgb=(0.81458,0.88339,0.11035)") 61 | pushPGFPlotsPreamble(" rgb=(0.80418,0.88205,0.11496)") 62 | pushPGFPlotsPreamble(" rgb=(0.79376,0.88068,0.12001)") 63 | pushPGFPlotsPreamble(" rgb=(0.78331,0.87928,0.12540)") 64 | pushPGFPlotsPreamble(" rgb=(0.77285,0.87787,0.13111)") 65 | pushPGFPlotsPreamble(" rgb=(0.76237,0.87642,0.13706)") 66 | pushPGFPlotsPreamble(" rgb=(0.75188,0.87495,0.14323)") 67 | pushPGFPlotsPreamble(" rgb=(0.74139,0.87345,0.14956)") 68 | pushPGFPlotsPreamble(" rgb=(0.73089,0.87192,0.15603)") 69 | pushPGFPlotsPreamble(" rgb=(0.72039,0.87035,0.16260)") 70 | pushPGFPlotsPreamble(" rgb=(0.70990,0.86875,0.16926)") 71 | pushPGFPlotsPreamble(" rgb=(0.69942,0.86712,0.17597)") 72 | pushPGFPlotsPreamble(" rgb=(0.68894,0.86545,0.18272)") 73 | pushPGFPlotsPreamble(" rgb=(0.67849,0.86374,0.18950)") 74 | pushPGFPlotsPreamble(" rgb=(0.66805,0.86200,0.19629)") 75 | pushPGFPlotsPreamble(" rgb=(0.65764,0.86022,0.20308)") 76 | pushPGFPlotsPreamble(" rgb=(0.64726,0.85840,0.20986)") 77 | pushPGFPlotsPreamble(" rgb=(0.63690,0.85654,0.21662)") 78 | pushPGFPlotsPreamble(" rgb=(0.62658,0.85464,0.22335)") 79 | pushPGFPlotsPreamble(" rgb=(0.61629,0.85271,0.23005)") 80 | pushPGFPlotsPreamble(" rgb=(0.60604,0.85073,0.23671)") 81 | pushPGFPlotsPreamble(" rgb=(0.59584,0.84872,0.24333)") 82 | pushPGFPlotsPreamble(" rgb=(0.58568,0.84666,0.24990)") 83 | pushPGFPlotsPreamble(" rgb=(0.57556,0.84457,0.25642)") 84 | pushPGFPlotsPreamble(" rgb=(0.56550,0.84243,0.26288)") 85 | pushPGFPlotsPreamble(" rgb=(0.55548,0.84025,0.26928)") 86 | pushPGFPlotsPreamble(" rgb=(0.54552,0.83804,0.27563)") 87 | pushPGFPlotsPreamble(" rgb=(0.53562,0.83579,0.28191)") 88 | pushPGFPlotsPreamble(" rgb=(0.52578,0.83349,0.28813)") 89 | pushPGFPlotsPreamble(" rgb=(0.51599,0.83116,0.29428)") 90 | pushPGFPlotsPreamble(" rgb=(0.50627,0.82879,0.30036)") 91 | pushPGFPlotsPreamble(" rgb=(0.49661,0.82638,0.30638)") 92 | pushPGFPlotsPreamble(" rgb=(0.48703,0.82393,0.31232)") 93 | pushPGFPlotsPreamble(" rgb=(0.47750,0.82144,0.31820)") 94 | pushPGFPlotsPreamble(" rgb=(0.46805,0.81892,0.32400)") 95 | pushPGFPlotsPreamble(" rgb=(0.45867,0.81636,0.32973)") 96 | pushPGFPlotsPreamble(" rgb=(0.44937,0.81377,0.33538)") 97 | pushPGFPlotsPreamble(" rgb=(0.44014,0.81114,0.34097)") 98 | pushPGFPlotsPreamble(" rgb=(0.43098,0.80847,0.34648)") 99 | pushPGFPlotsPreamble(" rgb=(0.42191,0.80577,0.35191)") 100 | pushPGFPlotsPreamble(" rgb=(0.41291,0.80304,0.35727)") 101 | pushPGFPlotsPreamble(" rgb=(0.40400,0.80027,0.36255)") 102 | pushPGFPlotsPreamble(" rgb=(0.39517,0.79748,0.36776)") 103 | pushPGFPlotsPreamble(" rgb=(0.38643,0.79464,0.37289)") 104 | pushPGFPlotsPreamble(" rgb=(0.37778,0.79178,0.37794)") 105 | pushPGFPlotsPreamble(" rgb=(0.36921,0.78889,0.38291)") 106 | pushPGFPlotsPreamble(" rgb=(0.36074,0.78596,0.38781)") 107 | pushPGFPlotsPreamble(" rgb=(0.35236,0.78301,0.39264)") 108 | pushPGFPlotsPreamble(" rgb=(0.34407,0.78003,0.39738)") 109 | pushPGFPlotsPreamble(" rgb=(0.33588,0.77702,0.40205)") 110 | pushPGFPlotsPreamble(" rgb=(0.32780,0.77398,0.40664)") 111 | pushPGFPlotsPreamble(" rgb=(0.31981,0.77091,0.41115)") 112 | pushPGFPlotsPreamble(" rgb=(0.31193,0.76782,0.41559)") 113 | pushPGFPlotsPreamble(" rgb=(0.30415,0.76470,0.41994)") 114 | pushPGFPlotsPreamble(" rgb=(0.29648,0.76156,0.42422)") 115 | pushPGFPlotsPreamble(" rgb=(0.28892,0.75839,0.42843)") 116 | pushPGFPlotsPreamble(" rgb=(0.28148,0.75520,0.43255)") 117 | pushPGFPlotsPreamble(" rgb=(0.27415,0.75199,0.43660)") 118 | pushPGFPlotsPreamble(" rgb=(0.26694,0.74875,0.44057)") 119 | pushPGFPlotsPreamble(" rgb=(0.25986,0.74549,0.44447)") 120 | pushPGFPlotsPreamble(" rgb=(0.25290,0.74221,0.44828)") 121 | pushPGFPlotsPreamble(" rgb=(0.24607,0.73891,0.45202)") 122 | pushPGFPlotsPreamble(" rgb=(0.23937,0.73559,0.45569)") 123 | pushPGFPlotsPreamble(" rgb=(0.23281,0.73225,0.45928)") 124 | pushPGFPlotsPreamble(" rgb=(0.22640,0.72889,0.46279)") 125 | pushPGFPlotsPreamble(" rgb=(0.22012,0.72551,0.46623)") 126 | pushPGFPlotsPreamble(" rgb=(0.21400,0.72211,0.46959)") 127 | pushPGFPlotsPreamble(" rgb=(0.20803,0.71870,0.47287)") 128 | pushPGFPlotsPreamble(" rgb=(0.20222,0.71527,0.47608)") 129 | pushPGFPlotsPreamble(" rgb=(0.19657,0.71183,0.47922)") 130 | pushPGFPlotsPreamble(" rgb=(0.19109,0.70837,0.48228)") 131 | pushPGFPlotsPreamble(" rgb=(0.18578,0.70489,0.48527)") 132 | pushPGFPlotsPreamble(" rgb=(0.18065,0.70140,0.48819)") 133 | pushPGFPlotsPreamble(" rgb=(0.17571,0.69790,0.49103)") 134 | pushPGFPlotsPreamble(" rgb=(0.17095,0.69438,0.49380)") 135 | pushPGFPlotsPreamble(" rgb=(0.16638,0.69086,0.49650)") 136 | pushPGFPlotsPreamble(" rgb=(0.16202,0.68732,0.49913)") 137 | pushPGFPlotsPreamble(" rgb=(0.15785,0.68376,0.50169)") 138 | pushPGFPlotsPreamble(" rgb=(0.15389,0.68020,0.50417)") 139 | pushPGFPlotsPreamble(" rgb=(0.15015,0.67663,0.50659)") 140 | pushPGFPlotsPreamble(" rgb=(0.14662,0.67305,0.50894)") 141 | pushPGFPlotsPreamble(" rgb=(0.14330,0.66946,0.51121)") 142 | pushPGFPlotsPreamble(" rgb=(0.14021,0.66586,0.51343)") 143 | pushPGFPlotsPreamble(" rgb=(0.13734,0.66225,0.51557)") 144 | pushPGFPlotsPreamble(" rgb=(0.13469,0.65864,0.51765)") 145 | pushPGFPlotsPreamble(" rgb=(0.13227,0.65501,0.51966)") 146 | pushPGFPlotsPreamble(" rgb=(0.13007,0.65138,0.52161)") 147 | pushPGFPlotsPreamble(" rgb=(0.12809,0.64775,0.52349)") 148 | pushPGFPlotsPreamble(" rgb=(0.12633,0.64411,0.52531)") 149 | pushPGFPlotsPreamble(" rgb=(0.12478,0.64046,0.52707)") 150 | pushPGFPlotsPreamble(" rgb=(0.12344,0.63681,0.52876)") 151 | pushPGFPlotsPreamble(" rgb=(0.12231,0.63315,0.53040)") 152 | pushPGFPlotsPreamble(" rgb=(0.12138,0.62949,0.53197)") 153 | pushPGFPlotsPreamble(" rgb=(0.12064,0.62583,0.53349)") 154 | pushPGFPlotsPreamble(" rgb=(0.12008,0.62216,0.53495)") 155 | pushPGFPlotsPreamble(" rgb=(0.11970,0.61849,0.53635)") 156 | pushPGFPlotsPreamble(" rgb=(0.11948,0.61482,0.53769)") 157 | pushPGFPlotsPreamble(" rgb=(0.11942,0.61114,0.53898)") 158 | pushPGFPlotsPreamble(" rgb=(0.11951,0.60746,0.54022)") 159 | pushPGFPlotsPreamble(" rgb=(0.11974,0.60379,0.54140)") 160 | pushPGFPlotsPreamble(" rgb=(0.12009,0.60010,0.54253)") 161 | pushPGFPlotsPreamble(" rgb=(0.12057,0.59642,0.54361)") 162 | pushPGFPlotsPreamble(" rgb=(0.12115,0.59274,0.54464)") 163 | pushPGFPlotsPreamble(" rgb=(0.12183,0.58905,0.54562)") 164 | pushPGFPlotsPreamble(" rgb=(0.12261,0.58537,0.54656)") 165 | pushPGFPlotsPreamble(" rgb=(0.12346,0.58169,0.54744)") 166 | pushPGFPlotsPreamble(" rgb=(0.12440,0.57800,0.54829)") 167 | pushPGFPlotsPreamble(" rgb=(0.12539,0.57432,0.54909)") 168 | pushPGFPlotsPreamble(" rgb=(0.12645,0.57063,0.54984)") 169 | pushPGFPlotsPreamble(" rgb=(0.12757,0.56695,0.55056)") 170 | pushPGFPlotsPreamble(" rgb=(0.12873,0.56327,0.55123)") 171 | pushPGFPlotsPreamble(" rgb=(0.12993,0.55958,0.55186)") 172 | pushPGFPlotsPreamble(" rgb=(0.13117,0.55590,0.55246)") 173 | pushPGFPlotsPreamble(" rgb=(0.13244,0.55222,0.55302)") 174 | pushPGFPlotsPreamble(" rgb=(0.13374,0.54853,0.55354)") 175 | pushPGFPlotsPreamble(" rgb=(0.13507,0.54485,0.55403)") 176 | pushPGFPlotsPreamble(" rgb=(0.13641,0.54117,0.55448)") 177 | pushPGFPlotsPreamble(" rgb=(0.13777,0.53749,0.55491)") 178 | pushPGFPlotsPreamble(" rgb=(0.13915,0.53381,0.55530)") 179 | pushPGFPlotsPreamble(" rgb=(0.14054,0.53013,0.55566)") 180 | pushPGFPlotsPreamble(" rgb=(0.14194,0.52645,0.55599)") 181 | pushPGFPlotsPreamble(" rgb=(0.14334,0.52277,0.55629)") 182 | pushPGFPlotsPreamble(" rgb=(0.14476,0.51909,0.55657)") 183 | pushPGFPlotsPreamble(" rgb=(0.14618,0.51541,0.55682)") 184 | pushPGFPlotsPreamble(" rgb=(0.14761,0.51173,0.55705)") 185 | pushPGFPlotsPreamble(" rgb=(0.14904,0.50805,0.55725)") 186 | pushPGFPlotsPreamble(" rgb=(0.15048,0.50437,0.55743)") 187 | pushPGFPlotsPreamble(" rgb=(0.15192,0.50069,0.55759)") 188 | pushPGFPlotsPreamble(" rgb=(0.15336,0.49700,0.55772)") 189 | pushPGFPlotsPreamble(" rgb=(0.15482,0.49331,0.55784)") 190 | pushPGFPlotsPreamble(" rgb=(0.15627,0.48962,0.55794)") 191 | pushPGFPlotsPreamble(" rgb=(0.15773,0.48593,0.55801)") 192 | pushPGFPlotsPreamble(" rgb=(0.15919,0.48224,0.55807)") 193 | pushPGFPlotsPreamble(" rgb=(0.16067,0.47854,0.55812)") 194 | pushPGFPlotsPreamble(" rgb=(0.16214,0.47484,0.55814)") 195 | pushPGFPlotsPreamble(" rgb=(0.16362,0.47113,0.55815)") 196 | pushPGFPlotsPreamble(" rgb=(0.16512,0.46742,0.55814)") 197 | pushPGFPlotsPreamble(" rgb=(0.16662,0.46371,0.55812)") 198 | pushPGFPlotsPreamble(" rgb=(0.16813,0.45999,0.55808)") 199 | pushPGFPlotsPreamble(" rgb=(0.16965,0.45626,0.55803)") 200 | pushPGFPlotsPreamble(" rgb=(0.17118,0.45253,0.55797)") 201 | pushPGFPlotsPreamble(" rgb=(0.17272,0.44879,0.55788)") 202 | pushPGFPlotsPreamble(" rgb=(0.17427,0.44504,0.55779)") 203 | pushPGFPlotsPreamble(" rgb=(0.17584,0.44129,0.55768)") 204 | pushPGFPlotsPreamble(" rgb=(0.17742,0.43753,0.55756)") 205 | pushPGFPlotsPreamble(" rgb=(0.17902,0.43376,0.55743)") 206 | pushPGFPlotsPreamble(" rgb=(0.18063,0.42997,0.55728)") 207 | pushPGFPlotsPreamble(" rgb=(0.18226,0.42618,0.55712)") 208 | pushPGFPlotsPreamble(" rgb=(0.18390,0.42238,0.55694)") 209 | pushPGFPlotsPreamble(" rgb=(0.18556,0.41857,0.55675)") 210 | pushPGFPlotsPreamble(" rgb=(0.18723,0.41475,0.55655)") 211 | pushPGFPlotsPreamble(" rgb=(0.18892,0.41091,0.55633)") 212 | pushPGFPlotsPreamble(" rgb=(0.19063,0.40706,0.55609)") 213 | pushPGFPlotsPreamble(" rgb=(0.19236,0.40320,0.55584)") 214 | pushPGFPlotsPreamble(" rgb=(0.19410,0.39932,0.55556)") 215 | pushPGFPlotsPreamble(" rgb=(0.19586,0.39543,0.55528)") 216 | pushPGFPlotsPreamble(" rgb=(0.19764,0.39153,0.55497)") 217 | pushPGFPlotsPreamble(" rgb=(0.19943,0.38761,0.55464)") 218 | pushPGFPlotsPreamble(" rgb=(0.20124,0.38367,0.55429)") 219 | pushPGFPlotsPreamble(" rgb=(0.20306,0.37972,0.55393)") 220 | pushPGFPlotsPreamble(" rgb=(0.20490,0.37575,0.55353)") 221 | pushPGFPlotsPreamble(" rgb=(0.20676,0.37176,0.55312)") 222 | pushPGFPlotsPreamble(" rgb=(0.20862,0.36775,0.55268)") 223 | pushPGFPlotsPreamble(" rgb=(0.21050,0.36373,0.55221)") 224 | pushPGFPlotsPreamble(" rgb=(0.21240,0.35968,0.55171)") 225 | pushPGFPlotsPreamble(" rgb=(0.21430,0.35562,0.55118)") 226 | pushPGFPlotsPreamble(" rgb=(0.21621,0.35153,0.55063)") 227 | pushPGFPlotsPreamble(" rgb=(0.21813,0.34743,0.55004)") 228 | pushPGFPlotsPreamble(" rgb=(0.22006,0.34331,0.54941)") 229 | pushPGFPlotsPreamble(" rgb=(0.22199,0.33916,0.54875)") 230 | pushPGFPlotsPreamble(" rgb=(0.22393,0.33499,0.54805)") 231 | pushPGFPlotsPreamble(" rgb=(0.22586,0.33081,0.54731)") 232 | pushPGFPlotsPreamble(" rgb=(0.22780,0.32659,0.54653)") 233 | pushPGFPlotsPreamble(" rgb=(0.22974,0.32236,0.54571)") 234 | pushPGFPlotsPreamble(" rgb=(0.23167,0.31811,0.54483)") 235 | pushPGFPlotsPreamble(" rgb=(0.23360,0.31383,0.54391)") 236 | pushPGFPlotsPreamble(" rgb=(0.23553,0.30953,0.54294)") 237 | pushPGFPlotsPreamble(" rgb=(0.23744,0.30520,0.54192)") 238 | pushPGFPlotsPreamble(" rgb=(0.23935,0.30085,0.54084)") 239 | pushPGFPlotsPreamble(" rgb=(0.24124,0.29648,0.53971)") 240 | pushPGFPlotsPreamble(" rgb=(0.24311,0.29209,0.53852)") 241 | pushPGFPlotsPreamble(" rgb=(0.24497,0.28768,0.53726)") 242 | pushPGFPlotsPreamble(" rgb=(0.24681,0.28324,0.53594)") 243 | pushPGFPlotsPreamble(" rgb=(0.24863,0.27877,0.53456)") 244 | pushPGFPlotsPreamble(" rgb=(0.25043,0.27429,0.53310)") 245 | pushPGFPlotsPreamble(" rgb=(0.25219,0.26978,0.53158)") 246 | pushPGFPlotsPreamble(" rgb=(0.25394,0.26525,0.52998)") 247 | pushPGFPlotsPreamble(" rgb=(0.25565,0.26070,0.52831)") 248 | pushPGFPlotsPreamble(" rgb=(0.25732,0.25613,0.52656)") 249 | pushPGFPlotsPreamble(" rgb=(0.25897,0.25154,0.52474)") 250 | pushPGFPlotsPreamble(" rgb=(0.26057,0.24692,0.52283)") 251 | pushPGFPlotsPreamble(" rgb=(0.26214,0.24229,0.52084)") 252 | pushPGFPlotsPreamble(" rgb=(0.26366,0.23763,0.51876)") 253 | pushPGFPlotsPreamble(" rgb=(0.26515,0.23296,0.51660)") 254 | pushPGFPlotsPreamble(" rgb=(0.26658,0.22826,0.51435)") 255 | pushPGFPlotsPreamble(" rgb=(0.26797,0.22355,0.51201)") 256 | pushPGFPlotsPreamble(" rgb=(0.26931,0.21882,0.50958)") 257 | pushPGFPlotsPreamble(" rgb=(0.27059,0.21407,0.50705)") 258 | pushPGFPlotsPreamble(" rgb=(0.27183,0.20930,0.50443)") 259 | pushPGFPlotsPreamble(" rgb=(0.27301,0.20452,0.50172)") 260 | pushPGFPlotsPreamble(" rgb=(0.27413,0.19972,0.49891)") 261 | pushPGFPlotsPreamble(" rgb=(0.27519,0.19490,0.49600)") 262 | pushPGFPlotsPreamble(" rgb=(0.27619,0.19007,0.49300)") 263 | pushPGFPlotsPreamble(" rgb=(0.27713,0.18523,0.48990)") 264 | pushPGFPlotsPreamble(" rgb=(0.27801,0.18037,0.48670)") 265 | pushPGFPlotsPreamble(" rgb=(0.27883,0.17549,0.48340)") 266 | pushPGFPlotsPreamble(" rgb=(0.27957,0.17060,0.48000)") 267 | pushPGFPlotsPreamble(" rgb=(0.28025,0.16569,0.47650)") 268 | pushPGFPlotsPreamble(" rgb=(0.28087,0.16077,0.47290)") 269 | pushPGFPlotsPreamble(" rgb=(0.28141,0.15583,0.46920)") 270 | pushPGFPlotsPreamble(" rgb=(0.28189,0.15088,0.46541)") 271 | pushPGFPlotsPreamble(" rgb=(0.28229,0.14591,0.46151)") 272 | pushPGFPlotsPreamble(" rgb=(0.28262,0.14093,0.45752)") 273 | pushPGFPlotsPreamble(" rgb=(0.28288,0.13592,0.45343)") 274 | pushPGFPlotsPreamble(" rgb=(0.28307,0.13090,0.44924)") 275 | pushPGFPlotsPreamble(" rgb=(0.28319,0.12585,0.44496)") 276 | pushPGFPlotsPreamble(" rgb=(0.28323,0.12078,0.44058)") 277 | pushPGFPlotsPreamble(" rgb=(0.28320,0.11568,0.43611)") 278 | pushPGFPlotsPreamble(" rgb=(0.28309,0.11055,0.43155)") 279 | pushPGFPlotsPreamble(" rgb=(0.28291,0.10539,0.42690)") 280 | pushPGFPlotsPreamble(" rgb=(0.28266,0.10020,0.42216)") 281 | pushPGFPlotsPreamble(" rgb=(0.28233,0.09495,0.41733)") 282 | pushPGFPlotsPreamble(" rgb=(0.28192,0.08967,0.41241)") 283 | pushPGFPlotsPreamble(" rgb=(0.28145,0.08432,0.40741)") 284 | pushPGFPlotsPreamble(" rgb=(0.28089,0.07891,0.40233)") 285 | pushPGFPlotsPreamble(" rgb=(0.28027,0.07342,0.39716)") 286 | pushPGFPlotsPreamble(" rgb=(0.27957,0.06784,0.39192)") 287 | pushPGFPlotsPreamble(" rgb=(0.27879,0.06214,0.38659)") 288 | pushPGFPlotsPreamble(" rgb=(0.27794,0.05632,0.38119)") 289 | pushPGFPlotsPreamble(" rgb=(0.27702,0.05034,0.37572)") 290 | pushPGFPlotsPreamble(" rgb=(0.27602,0.04417,0.37016)") 291 | pushPGFPlotsPreamble(" rgb=(0.27495,0.03775,0.36454)") 292 | pushPGFPlotsPreamble(" rgb=(0.27381,0.03150,0.35885)") 293 | pushPGFPlotsPreamble(" rgb=(0.27259,0.02556,0.35309)") 294 | pushPGFPlotsPreamble(" rgb=(0.27131,0.01994,0.34727)") 295 | pushPGFPlotsPreamble(" rgb=(0.26994,0.01463,0.34138)") 296 | pushPGFPlotsPreamble(" rgb=(0.26851,0.00961,0.33543)") 297 | pushPGFPlotsPreamble(" rgb=(0.26700,0.00487,0.32942)") 298 | pushPGFPlotsPreamble(" }") 299 | pushPGFPlotsPreamble("}"); 300 | --------------------------------------------------------------------------------