├── REQUIRE
├── .codecov.yml
├── .gitignore
├── src
    ├── AMVW.jl
    ├── utils.jl
    ├── diagnostics.jl
    ├── AMVW_algorithm.jl
    ├── transformations.jl
    ├── types.jl
    ├── factorization.jl
    └── bulge.jl
├── README.md
├── .travis.yml
├── test.jl
├── appveyor.yml
├── LICENSE.md
└── test
    ├── runtests.jl
    └── benchmark.jl


/REQUIRE:
--------------------------------------------------------------------------------
1 | julia 0.5
2 | 


--------------------------------------------------------------------------------
/.codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.jl.cov
2 | *.jl.*.cov
3 | *.jl.mem
4 | 


--------------------------------------------------------------------------------
/src/AMVW.jl:
--------------------------------------------------------------------------------
 1 | #__precompile__(true)
 2 | module AMVW
 3 | 
 4 | using Compat
 5 | # package code goes here
 6 | 
 7 | 
 8 | include("types.jl")
 9 | include("utils.jl")
10 | include("transformations.jl")
11 | include("bulge.jl")
12 | include("factorization.jl")
13 | include("diagnostics.jl")
14 | include("AMVW_algorithm.jl")
15 | 
16 | 
17 | end # module
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AMVW
 2 | 
 3 | [![Build Status](https://travis-ci.org/jverzani/AMVW.jl.svg?branch=master)](https://travis-ci.org/jverzani/AMVW.jl)
 4 | 
 5 | [![Coverage Status](https://coveralls.io/repos/jverzani/AMVW.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/jverzani/AMVW.jl?branch=master)
 6 | 
 7 | [![codecov.io](http://codecov.io/github/jverzani/AMVW.jl/coverage.svg?branch=master)](http://codecov.io/github/jverzani/AMVW.jl?branch=master)
 8 | 
 9 | 
10 | 
11 | An implementation of the main algorithm in "Fast and backward stable computation of roots of polynomials" by Aurentz, Mach, Vandrebil, and Watkins.
12 | 
13 | Follows accompanying code which has unclear license. 
14 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Documentation: http://docs.travis-ci.com/user/languages/julia/
 2 | language: julia
 3 | os:
 4 |   - linux
 5 |   - osx
 6 | julia:
 7 |   - release
 8 |   - nightly
 9 | notifications:
10 |   email: false
11 | # uncomment the following lines to override the default test script
12 | #script:
13 | #  - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
14 | #  - julia -e 'Pkg.clone(pwd()); Pkg.build("AMVW"); Pkg.test("AMVW"; coverage=true)'
15 | after_success:
16 |   # push coverage results to Coveralls
17 |   - julia -e 'cd(Pkg.dir("AMVW")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'
18 |   # push coverage results to Codecov
19 |   - julia -e 'cd(Pkg.dir("AMVW")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
20 | 


--------------------------------------------------------------------------------
/test.jl:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Drivers...
 3 | 
 4 | using Polynomials
 5 | ## ps a poly with coefficients p0 + p1x +p2x^2 + p3x^3 + p4x^4 [p0, p1, ..., p4]
 6 | damvw(p::Poly) = damvw(p.a)
 7 | 
 8 | function poly_roots(p::Poly)
 9 |     state = damvw(p.a)
10 |     AMVW(state)
11 |     poly_roots(state)
12 | end
13 | 
14 | 
15 | using Base.Profile
16 | using Polynomials
17 | T = Float64
18 | #T = BigFloat
19 | x = variable(T)
20 | p = prod(x - i/10 for i in 1:20)
21 | #p =  prod(x^2 + i for i in 1:5)
22 | #p = poly(linspace(.1,1,20))
23 | state = damvw(p)
24 | AMVW.AMVW(state)
25 | 
26 | 
27 | 
28 | # ## warmed up
29 | 
30 | n = 5
31 | p = poly(linspace(.5,1,n))
32 | println(n)
33 | state = damvw(p)
34 | @time DAMVW.AMVW(state)
35 | 
36 | Profile.clear()
37 | p = poly(linspace(.5,1,n))
38 | state = damvw(p)
39 | @profile DAMVW.AMVW(state)
40 | Profile.print(format=:flat, sortedby=:count)
41 | 
42 | # n = 10
43 | # as = zeros(5n)
44 | # for i in 1:5n
45 | #     println("doing $i")
46 | #     p = poly(linspace(.2, 1.0, i+2))
47 | #     state = damvw(p)
48 | #     a = time(); DAMVW.AMVW(state); b = time() - a
49 | #     as[i] = b
50 | # end
51 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | environment:
 2 |   matrix:
 3 |   - JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe"
 4 |   - JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe"
 5 |   - JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
 6 |   - JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe"
 7 | 
 8 | branches:
 9 |   only:
10 |     - master
11 |     - /release-.*/
12 | 
13 | notifications:
14 |   - provider: Email
15 |     on_build_success: false
16 |     on_build_failure: false
17 |     on_build_status_changed: false
18 | 
19 | install:
20 | # Download most recent Julia Windows binary
21 |   - ps: (new-object net.webclient).DownloadFile(
22 |         $("http://s3.amazonaws.com/"+$env:JULIAVERSION),
23 |         "C:\projects\julia-binary.exe")
24 | # Run installer silently, output to C:\projects\julia
25 |   - C:\projects\julia-binary.exe /S /D=C:\projects\julia
26 | 
27 | build_script:
28 | # Need to convert from shallow to complete for Pkg.clone to work
29 |   - IF EXIST .git\shallow (git fetch --unshallow)
30 |   - C:\projects\julia\bin\julia -e "versioninfo();
31 |       Pkg.clone(pwd(), \"AMVW\"); Pkg.build(\"AMVW\")"
32 | 
33 | test_script:
34 |   - C:\projects\julia\bin\julia -e "Pkg.test(\"AMVW\")"
35 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | This is not the proper license, though it would be nice
 3 | 
 4 | ## The AMVW.jl package is licensed under the MIT "Expat" License:
 5 | 
 6 | > Copyright (c) 2017: XXX
 7 | > 
 8 | > Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | > of this software and associated documentation files (the "Software"), to deal
10 | > in the Software without restriction, including without limitation the rights
11 | > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | > copies of the Software, and to permit persons to whom the Software is
13 | > furnished to do so, subject to the following conditions:
14 | > 
15 | > The above copyright notice and this permission notice shall be included in all
16 | > copies or substantial portions of the Software.
17 | > 
18 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | > SOFTWARE.
25 | > 
26 | 


--------------------------------------------------------------------------------
/src/utils.jl:
--------------------------------------------------------------------------------
  1 | 
  2 | ## take poly [p0, p1, ..., pn] and return
  3 | ## [q_m-1, q_m-2, ..., q0], k
  4 | ## where we trim of k roots of 0, and then make p monic, then reverese
  5 | ## monomial x^5
  6 | function reverse_poly{T}(ps::Vector{T})
  7 |     ## trim any 0s from the end of ps
  8 |     N = findlast(!iszero, ps)
  9 |     K = findfirst(!iszero, ps)
 10 | 
 11 |     N == 0 && return(zeros(T,0), length(ps))
 12 |     ps = ps[K:N]
 13 | 
 14 |     
 15 |     qs = reverse(ps./ps[end])[2:end]
 16 |     qs, K-1
 17 | end
 18 | 
 19 | #
 20 | function quadratic_equation{T <: Real}(a::T, b::T, c::T)   
 21 |     qdrtc(a, -(0.5)*b, c)
 22 | end
 23 | 
 24 | ## make more robust
 25 | function quadratic_equation{T}(a::Complex{T}, b::Complex{T}, c::Complex{T})
 26 |     d = sqrt(b^2 - 4*a*c)
 27 |     e1 = (-b + d)/(2a); e2 = (-b-d)/(2a)
 28 |     return (real(e1), imag(e1), real(e2), imag(e2))
 29 |     
 30 | end
 31 | 
 32 | ## Kahan quadratic equation with fma
 33 | ##  https://people.eecs.berkeley.edu/~wkahan/Qdrtcs.pdf
 34 | 
 35 | ## solve ax^2 - 2bx + c
 36 | function qdrtc{T <: Real}(a::T, b::T, c::T)
 37 |     # z1, z2 roots of ax^2 - 2bx + c
 38 |     d = discr(a,b,c)  # (b^2 - a*c), as 2 removes 4
 39 |     
 40 |     if d <= 0
 41 |         r = b/a  # real
 42 |         s = sqrt(-d)/a #imag
 43 |         return (r,s,r,-s)
 44 |     else
 45 |         r = sqrt(d) * (sign(b) + iszero(b)) + b
 46 |         return (r/a, zero(T), c/r, zero(T))
 47 |     end
 48 | end
 49 | 
 50 | ## more work could be done here.
 51 | function discr{T}(a::T,b::T,c::T)
 52 |     pie = 3.0 # depends on 53 or 64 bit...
 53 |     d = b*b - a*c
 54 |     e = b*b + a*c
 55 | 
 56 |     pie*abs(d) > e && return d
 57 | 
 58 |     p = b*b
 59 |     dp = muladd(b,b,-p)
 60 |     q = a*c
 61 |     dq = muladd(a,c,-q)
 62 | 
 63 |     (p-q) + (dp - dq)
 64 | end
 65 | 
 66 | ##
 67 | # solve degree 2 or less case
 68 | ## COMPLEX VALUSE XXX
 69 | function solve_simple_cases(state)
 70 | #    println("Simple case setting eigen value")
 71 |     if N == 0
 72 |         state.FLAG = -1
 73 |         return
 74 |     elseif N == 1
 75 |         state.FLAG = 0
 76 |         N == 1 && (state.REIGS[1] = -state.POLY[1])
 77 |         return
 78 |     elseif N == 2
 79 |         # quadratic formula
 80 |         c,b,a = state.POLY[1], state.POLY[2], 1.0
 81 | 
 82 |         tr = -b
 83 |         disc = b^2 - 4.0*c
 84 | 
 85 |         if disc < 0
 86 |             state.REIGS[1] = -b/2.0
 87 |             state.IEIGS[1] = sqrt(-disc)/2.0
 88 |             state.REIGS[2] = state.REIGS[1]
 89 |             state.IEIGS[2] = -state.IEIGS[1]
 90 |         else
 91 |             u,v = tr + sqrt(disc), tr - sqrt(disc)
 92 |             if abs(u) < abs(v)
 93 |                 u,v = v, u
 94 |             end
 95 |             if u == 0
 96 |                 ## nothing to do
 97 |             else
 98 |                 state.REIGS[1] = u/2.0
 99 |                 state.REIGS[2] = c/state.REIGS[1]
100 |             end
101 |         end
102 |     end
103 | end
104 | 
105 | 


--------------------------------------------------------------------------------
/src/diagnostics.jl:
--------------------------------------------------------------------------------
  1 | ## Diagonostic code
  2 | ##
  3 | 
  4 | ## make a rotator into a full matrix
  5 | 
  6 | function as_full{T}(a::Rotator{T}, N::Int)
  7 |     c,s = vals(a)
  8 |     i = idx(a)
  9 |     i < N || error("too big")
 10 |     A = eye(Complex{T}, N)
 11 |     A[i:i+1, i:i+1] = [c -conj(s); s conj(c)]
 12 |     A
 13 | end
 14 | 
 15 | 
 16 | 
 17 | function zero_out!{T}(A::Array{T}, tol=1e-12)
 18 |     A[norm.(A) .<= tol] = zero(T)
 19 | end
 20 | function zero_out!{T}(A::Array{Complex{T}}, tol=1e-12)
 21 |     for i in eachindex(A)
 22 |         c = A[i]
 23 |         cr, ci = real(c), imag(c)
 24 |         if abs(cr) < tol
 25 |             cr = zero(T)
 26 |         end
 27 |         if abs(ci) < tol
 28 |             ci = zero(T)
 29 |         end
 30 |         A[i] = complex(cr, ci)
 31 |     end
 32 | end
 33 | 
 34 | ## diagnostic
 35 | 
 36 | ## create Full matrix from state object. For diagnostic purposes.
 37 | # we may or may not have a diagonal matrix to keep track or
 38 | D_matrix{T}(state::ComplexRealSingleShift{T}) = diagm(state.D)
 39 | D_matrix(state::ShiftType) = I
 40 | 
 41 | #function Base.full{T}(state::ComplexRealSingleShift{T}, what=:A)
 42 | function Base.full{T}(state::ShiftType{T}, what=:A)
 43 |     N = state.N
 44 |     Q = as_full(state.Q[1],N+1); for i in 2:N Q = Q * as_full(state.Q[i],N+1) end
 45 |     Ct = as_full(state.Ct[1], N+1); for i in 2:N Ct =  as_full(state.Ct[i],N+1)*Ct end
 46 |     B = as_full(state.B[1],N+1); for i in 2:N B = B * as_full(state.B[i],N+1) end
 47 |     D = D_matrix(state)
 48 |     
 49 |     
 50 |     #    x = -vcat(state.POLY[2:state.N], state.POLY[1], 1)
 51 |     par = iseven(state.N) ? one(T) : -one(T)
 52 |     x = -vcat(state.POLY[state.N-1:-1:1], -par * state.POLY[state.N],  par * 1)
 53 |     alpha = norm(x)
 54 |     e1 = zeros(T, state.N+1); e1[1]=one(T)
 55 |     en = zeros(T, state.N+1); en[N] = one(T)
 56 |     en1 = zeros(T, state.N+1); en1[N+1] = one(T)
 57 | 
 58 |     rho = transpose(en1) * Ct * e1  # scalar 
 59 |     yt = -1/rho * transpose(en1)  * Ct * B
 60 |     # clean
 61 |     for i in eachindex(yt)
 62 |         if norm(yt[i]) < 1e-12
 63 |             yt[i] = 0
 64 |         end
 65 |     end
 66 |     
 67 |     ## we have R = Z + x = Ct * (B * D + e1 * yt)
 68 |     Z = Ct * B 
 69 |     zero_out!(Z)
 70 |     
 71 |     x = Ct * e1 * yt 
 72 |     
 73 |     R = D*(Z + x)
 74 |     zero_out!(R)
 75 |     what == :R && return R
 76 |     
 77 |     A = Q * R
 78 |     zero_out!(A)
 79 |     A
 80 | end
 81 | 
 82 | 
 83 | # simple graphic to show march of algorithm
 84 | function show_status{T}(state::ShiftType{T})
 85 |     qs = [norm(u.s) for u in state.Q[state.ctrs.start_index:state.ctrs.stop_index]]
 86 |     minq = length(qs) > 0 ?  minimum(qs) : 0.0
 87 | 
 88 |     
 89 |     x = fill(".", state.N+2)
 90 |     x[state.ctrs.zero_index+1] = "α"
 91 |     x[state.ctrs.start_index+1] = "x"
 92 |     x[state.ctrs.stop_index+2] = "Δ"
 93 |     println(join(x, ""), " ($minq)")
 94 | end
 95 | 
 96 | ## create a rotation matrix
 97 | function rotm{T}(a::T,b, i, N)
 98 |     r = eye(T, N)
 99 |     r[i:i+1, i:i+1] = [a -conj(b); b conj(a)]
100 |     r
101 | end
102 | 
103 | 


--------------------------------------------------------------------------------
/src/AMVW_algorithm.jl:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Main algorithm of AMV&W
  3 | ## This follows that given in the paper very closely
  4 | function AMVW_algorithm{T}(state::ShiftType{T})
  5 | 
  6 | 
  7 |     it_max = 60 * state.N
  8 |     kk = 0
  9 | 
 10 |     while kk <= it_max
 11 | 
 12 |         ## finished up!
 13 |         state.ctrs.stop_index <= 0 && return
 14 |         
 15 |         check_deflation(state)
 16 |         kk += 1
 17 | 
 18 | ##        show_status(state)
 19 | 
 20 |         k = state.ctrs.stop_index
 21 | 
 22 |         if state.ctrs.stop_index - state.ctrs.zero_index >= 2
 23 |             
 24 |             bulge_step(state)
 25 |             state.ctrs.it_count += 1
 26 |             state.ctrs.tr -= 2
 27 |             
 28 |         elseif state.ctrs.stop_index - state.ctrs.zero_index == 1
 29 | 
 30 |             diagonal_block(state,  k + 1)
 31 |             eigen_values(state)
 32 | 
 33 |             
 34 |             state.REIGS[k], state.IEIGS[k] = state.e2
 35 |             state.REIGS[k+1], state.IEIGS[k+1] = state.e1
 36 | 
 37 |             if k > 1
 38 |                 diagonal_block(state,  k)
 39 |                 eigen_values(state)
 40 |             end
 41 |             
 42 |             diagonal_block(state, 2)
 43 |             
 44 |             if state.ctrs.stop_index == 2
 45 |                 diagonal_block(state, 2)
 46 |                 e1 = state.A[1,1]
 47 |                 state.REIGS[1] = real(e1)
 48 |                 state.IEIGS[1] = imag(e1)
 49 |             end
 50 |             state.ctrs.zero_index = 0
 51 |             state.ctrs.start_index = 1
 52 |             state.ctrs.stop_index = state.ctrs.stop_index - 2
 53 |             
 54 |         elseif state.ctrs.stop_index - state.ctrs.zero_index == 0
 55 | 
 56 |             diagonal_block(state, state.ctrs.stop_index + 1)
 57 |             e1, e2 = state.A[1,1], state.A[2,2] 
 58 |                         
 59 |             if state.ctrs.stop_index == 1
 60 |                 state.REIGS[1], state.IEIGS[1] = real(e1), imag(e1)
 61 |                 state.REIGS[2], state.IEIGS[2] = real(e2), imag(e2)
 62 |                 state.ctrs.stop_index = 0
 63 |             else
 64 |                 state.REIGS[k+1], state.IEIGS[k+1] = real(e2), imag(e2)
 65 |                 state.ctrs.zero_index = 0
 66 |                 state.ctrs.start_index = 1
 67 |                 state.ctrs.stop_index = k - 1
 68 |             end
 69 |         end
 70 |     end
 71 | 
 72 |     warn("Not all roots were found. The first $(state.ctrs.stop_index-1) are missing.")
 73 | end
 74 | 
 75 | ## qs is [p_{n-1}, p_{n-2}, ..., p_1, p_0] for
 76 | ## monic poly x^n + p_{n-1}x^{n-1} + ... + p_1 x + p_0
 77 | ## returns RealDoubleShift object
 78 | function amvw{T <: Real}(qs::Vector{T})
 79 |     state = RealDoubleShift(qs)
 80 |     init_state(state)
 81 |     AMVW_algorithm(state)
 82 |     state
 83 | end
 84 | 
 85 | function amvw{T <: Real}(qs::Vector{Complex{T}})
 86 |     #    state = ComplexSingleShift(qs)
 87 |     state = ComplexRealSingleShift(qs)
 88 |     init_state(state)
 89 |     AMVW_algorithm(state)
 90 |     state
 91 | end
 92 | 
 93 | """
 94 | Use AMVW algorithm doubleshift alorithm to find roots
 95 | of the polynomial p_0 + p_1 x + p_2 x^2 + ... + p_n x^n encoded as
 96 | `[p_0, p_1, ..., p_n]` (the same ordering used by `Polynomials`).
 97 | 
 98 | Returns an object of type `RealDoubleShift`.
 99 | 
100 | Example: API needs work!
101 | ```
102 | using Polynomials
103 | x = variable()
104 | p = poly(x - i/10 for i in 5:10)
105 | state = amvw(p.a)
106 | complex.(state.REIGS, state.IEIGS)
107 | ```
108 | """
109 | function poly_roots{T}(ps::AbstractVector{T})
110 |     ## roots of poly [p0, p1, ..., pn]    
111 |     qs, k = reverse_poly(ps)
112 | 
113 |     # k is number of 0 factors
114 |     ## simple cases
115 |     n = length(qs)
116 |     if n == 0 
117 |         rts = complex.(zeros(k), zeros(k))
118 |     elseif n == 1
119 |         as = vcat([-real(qs[1])], zeros(k))
120 |         bs = vcat([-imag(qs[1])], zeros(k))
121 |         rts = complex.(as, bs)
122 |     elseif n == 2
123 |         if T <: Real
124 |             b,c = -(0.5)*qs[1], qs[2]
125 |             e1r, e1i, e2r, e2i = qdrtc(one(T), b, c)
126 |         else
127 |             e1r, e1i, e2r, e2i = quadratic_equation(one(T), qs[1], qs[2])
128 |         end
129 |         as = vcat([e1r, e2r], zeros(k))
130 |         bs = vcat([e1i, e2i], zeros(k))
131 |         rts = complex.(as, bs)
132 |     else
133 |         state = amvw(qs)
134 |         as = vcat(state.REIGS, zeros(k))
135 |         bs = vcat(state.IEIGS, zeros(k))
136 |         rts = complex.(as, bs) 
137 |     end
138 |     return rts
139 | end
140 | 


--------------------------------------------------------------------------------
/src/transformations.jl:
--------------------------------------------------------------------------------
  1 | ##
  2 | ##################################################
  3 | 
  4 | """
  5 |  rotations; find values
  6 |  Real Givens
  7 |  This subroutine computes c and s such that,
  8 | 
  9 |  [c -s] * [a, b] = [r,0]; c^2 + s^2 = 1
 10 | 
 11 |  and 
 12 | 
 13 |  r = sqrt(|a|^2 + |b|^2).
 14 | 
 15 |   XXX seems faster to just return r, then not
 16 | """
 17 | function givensrot{T <: Real}(a::T,b::T)
 18 |     iszero(b) && return (sign(a) * one(T), zero(T), abs(a))
 19 |     iszero(a) && return(zero(T), -sign(b) * one(T), abs(b))
 20 | 
 21 |     r = hypot(a,b)
 22 |     return(a/r,-b/r,r)
 23 | end
 24 | 
 25 | ## givens rotation
 26 | ##################################################
 27 | # Compute Givens rotation zeroing b
 28 | #
 29 | # G1 [ ar + i*ai ] = [ nrm ]
 30 | # G1 [    b      ] = [     ]
 31 | #
 32 | # all variables real (nrm complex)
 33 | # returns (copmlex(cr, ci), s) with
 34 | # u=complex(cr,ci), v=s; then [u -v; v conj(u)] * [complex(ar, ai), s] has 0
 35 | #
 36 | # XXX: Could hand write this if needed, here we just use `givens` with a flip
 37 | # to get what we want, a real s part, not s part
 38 | function givensrot{T <: Real}(a::Complex{T},b::Complex{T})
 39 |     G, r = givens(b, a, 1, 2)
 40 |     G.s, -G.c, r
 41 | end
 42 | givensrot{T <: Real}(a::Complex{T},b::T) = givensrot(a, complex(b, zero(T)))
 43 | 
 44 | 
 45 | ####   Operations on [,[ terms
 46 | 
 47 | ## The zero_index and stop_index+1 point at "D" matrices
 48 | ## 
 49 | ## Let a D matrix be one of [1 0; 0 1] or [-1 0; 0 1] (D^2 = I). Then we have this move
 50 | ## D    --->   D  (we update the rotator)
 51 | ##   [       [
 52 | ##
 53 | ## this is `dflip`
 54 | function dflip{T}(a::RealRotator{T}, d=one(T))
 55 |     a.s = sign(d)*a.s
 56 | end
 57 | 
 58 | # get d from rotator which is RR(1,0) or RR(-1, 0)
 59 | function getd{T}(a::RealRotator{T})
 60 |     c, s = vals(a)
 61 |     norm(s) <= 4eps(T) || error("a is not a diagonal rotator")
 62 |     sign(c)
 63 | end
 64 | 
 65 | ## This is main case
 66 | #  Q           D Q
 67 | #     D --> D 
 68 |     
 69 | 
 70 | """
 71 |    D  --> D
 72 | U           V
 73 | """
 74 | function Dflip{T}(r::ComplexComplexRotator{T}, d::ComplexComplexRotator{T})
 75 |     !is_diagonal(d) && error("d must be diagonal rotator")
 76 | 
 77 |     # D is fixed,
 78 |     alpha = d.c
 79 |     r.s = r.s * conj(alpha)
 80 | end
 81 | 
 82 | ##   U --> U Da
 83 | ## D           Da
 84 | ## (not the reverse!)
 85 | function Dflip{T}(d::ComplexRealRotator{T}, r::ComplexRealRotator{T})
 86 | #    !is_diagonal(d) && error("d must be diagonal rotator")
 87 | 
 88 |     alpha = d.c
 89 |     c,s = vals(r)
 90 |     vals!(r, c*conj(alpha), s)
 91 | end
 92 | 
 93 | ## We have this for left fuse and for deflation
 94 | #
 95 | #  Di                         Di                     
 96 | #    Qi+1             Si+1     Di+1       Si+1   
 97 | #        Qi+2    -->    Si+2    Di+2    =    Si+2  * diagm([alpha, I, conj(alpha)])
 98 | #           ...           ...     ...          ...
 99 | #             Qj            Sj      Dj          Sj
100 | function cascade(Qs, D, alpha, i, j)
101 |     # Q = CR(c,s) -> S = CR(c*conj(alpha), s)
102 |     for k in (i+1):j
103 |         c,s = vals(Qs[k])
104 |         vals!(Qs[k], c*conj(alpha), s)
105 |     end
106 |     D[i] *= alpha
107 |     D[j+1] *= conj(alpha)
108 | end
109 | 
110 | 
111 | 
112 | 
113 | ## Fuse
114 | ## fuse combines two rotations, a and b, into one,
115 | 
116 | 
117 | ## For ComplexRealRotator, the result of a*b will not have a real sign
118 | ## we output by rotating by alpha.
119 | ## return alpha so a*b = f(ab) * [alpha 0; 0 conj(alpha)]
120 | ## for left with have uv -> (u') Di
121 | function fuse{T}(a::ComplexRealRotator{T}, b::ComplexRealRotator{T},::Type{Val{:left}})
122 |     #    idx(a) == idx(b) || error("can't fuse")
123 |     u = a.c * b.c - conj(a.s) * b.s
124 |     v = conj(a.c) * b.s + a.s * b.c
125 | 
126 |     alpha =  conj(v)/norm(v)
127 | 
128 |     a.c = u * alpha
129 |     a.s = norm(v)
130 | 
131 |     conj(alpha)
132 | end
133 | 
134 | # for right we have uv -> (v') Di
135 | function fuse{T}(a::ComplexRealRotator{T}, b::ComplexRealRotator{T}, ::Type{Val{:right}})
136 | #    idx(a) == idx(b) || error("can't fuse")
137 |     u = a.c * b.c - conj(a.s) * b.s
138 |     v = conj(a.c) * b.s + a.s * b.c
139 | 
140 |     
141 |     alpha =  conj(v)/norm(v)
142 | 
143 |     b.c = u * alpha
144 |     b.s = norm(v)
145 | 
146 |     conj(alpha)
147 | end
148 | 
149 | 
150 | ## Fuse for genera rotation
151 | ## We have two functions as it seems a bit faster
152 | fuse{T}(a::Rotator{T}, b::Rotator{T}, dir, d) = fuse(a,b,dir)
153 | 
154 | function fuse{T}(a::Rotator{T}, b::Rotator{T},::Type{Val{:left}})
155 |     #    idx(a) == idx(b) || error("can't fuse")
156 |     u = a.c * b.c - conj(a.s) * b.s
157 |     a.s = conj(a.c) * b.s + a.s * b.c
158 |     a.c = u
159 | 
160 |     one(T)
161 | end
162 | function fuse{T}(a::Rotator{T}, b::Rotator{T}, ::Type{Val{:right}})
163 | #    idx(a) == idx(b) || error("can't fuse")
164 |     u = a.c * b.c - conj(a.s) * b.s
165 |     b.s = conj(a.c) * b.s + a.s * b.c
166 |     b.c = u
167 | 
168 |     one(T)
169 | end
170 | 
171 | 
172 | # Turnover: Q1    Q3   | x x x |      Q1
173 | #              Q2    = | x x x | = Q3    Q2  <-- misfit=3 Q1, Q2 shift; 
174 | #                      | x x x |
175 | #
176 | # misfit is Val{:right} for <-- (right to left turnover), Val{:left} for -->
177 | #
178 | # This is the key computation once matrices are written as rotators
179 | # We wrote this for complex rotators where sine part may be complex
180 | # so we make use of alpha and beta, which isn't otherwise needed
181 | # could streamline, but doesn't seem to incur an expense
182 | 
183 | function _turnover{T}(Q1::Rotator{T}, Q2::Rotator{T}, Q3::Rotator{T})    
184 | #    i,j,k = idx(Q1), idx(Q2), idx(Q3)
185 | #    (i == k) || error("Need to have a turnover up down up or down up down: have i=$#i, j=$j, k=$k")
186 | #    abs(j-i) == 1 || error("Need to have |i-j| == 1")
187 |     
188 |     c1,s1 = vals(Q1)
189 |     c2,s2 = vals(Q2)
190 |     c3,s3 = vals(Q3)
191 | 
192 |     # key is to find U1,U2,U3 with U2'*U1'*U3' * (Q1*Q2*Q3) = I
193 |     # do so by three Givens rotations to make (Q1*Q2*Q3) upper triangular
194 |     
195 |     # initialize c4 and s4
196 |     a = conj(c1)*c2*s3 + s1*c3 
197 |     b = s2*s3
198 |     # check norm([a,b]) \approx 1    
199 |     c4, s4, temp = givensrot(a,b)#, Val{true})
200 | 
201 |     # initialize c5 and s5
202 | 
203 |     a = c1*c3 - conj(s1)*c2*s3
204 |     b = temp
205 |     # check norm([a,b]) \approx 1
206 |     c5, s5, alpha = givensrot(a, b)
207 | 
208 |     alpha = alpha/norm(alpha)
209 |     c5 *= conj(alpha) # make diagonal elements 1
210 |     s5 *= alpha
211 |     
212 |     # second column
213 |     u = -c1*conj(s3) - conj(s1)*c2*conj(c3)
214 |     v = conj(c1)*c2*conj(c3) - s1*conj(s3)
215 |     w = s2 * conj(c3)
216 | 
217 |     a = c4*conj(c5)*v - conj(s4)*conj(c5)*w + s5*u
218 |     b = conj(c4)*w + s4*v
219 | 
220 |     c6, s6, beta = givensrot(a,b)
221 | 
222 |     beta = beta/norm(beta)
223 |     c6 *= conj(beta) # make diagonal elements 1
224 |     s6 *= beta
225 |     
226 |     (c4, s4, c5, s5, c6, s6)
227 | end
228 | 
229 | 
230 | 
231 | 
232 | function turnover{T}(Q1::Rotator{T}, Q2::Rotator{T}, Q3::Rotator{T},
233 |                      ::Type{Val{:right}})
234 | 
235 |     c4,s4,c5,s5,c6,s6 = _turnover(Q1,Q2,Q3)
236 |     vals!(Q3, conj(c4), -s4)
237 |     vals!(Q1, conj(c5), -s5)
238 |     vals!(Q2, conj(c6), -s6)
239 |     idx!(Q3, idx(Q2))  # misfit is right one
240 | end
241 | 
242 | turnover{T}(Q1::Rotator{T}, Q2::Rotator{T}, Q3::Rotator{T}) = turnover(Q1, Q2, Q3, Val{:right})
243 | 
244 | function turnover{T}(Q1::Rotator{T}, Q2::Rotator{T}, Q3::Rotator{T},
245 |                      ::Type{Val{:left}})
246 |     
247 |     c4,s4,c5,s5,c6,s6 = _turnover(Q1,Q2,Q3)
248 |     
249 |     vals!(Q2, conj(c4), -s4)
250 |     vals!(Q3, conj(c5), -s5)
251 |     vals!(Q1, conj(c6), -s6)
252 |     idx!(Q1, idx(Q2))   # misfit is left one
253 | end
254 | 
255 | 
256 | 
257 | 
258 | 
259 | ## passthrough
260 | ## Pass a rotator through a diagonal matrix with phase shifts
261 | ## D U -> U' D'
262 | ## Here D[i] = D[1]
263 | ## usually call with view(state.d, idx(U):idx(U)+1)
264 | function passthrough{T}(D, U::ComplexRealRotator{T})
265 |     alpha, beta = D[1], D[2]
266 | 
267 |     c, s = vals(U)
268 |     u = c * alpha * conj(beta)
269 |     v = s
270 |     vals!(U, u, v)
271 |     D[1], D[2] = beta, alpha
272 | end
273 | 
274 | function passthrough{T}(D::ComplexRealRotator{T}, U::ComplexRealRotator{T})
275 |     norm(D.s) <= 1e2*eps(T) || error("D not diagonal")
276 |     alpha, ds = vals(D)
277 |     c,s = vals(U)
278 |     vals!(U, c*alpha*alpha, s)
279 |     vals!(D, conj(alpha), ds)
280 | end
281 | 


--------------------------------------------------------------------------------
/test/runtests.jl:
--------------------------------------------------------------------------------
  1 | using AMVW
  2 | const A = AMVW
  3 | using Base.Test
  4 | using Polynomials
  5 | 
  6 | # transformations
  7 | 
  8 | # givens rotation
  9 | @testset "Givens rotations" begin
 10 |     a,b = complex(1.0, 2.0), complex(2.0, 3.0)
 11 |     c,s,r = A.givensrot(a,b)
 12 |     @test norm(([c -conj(s); s conj(c)] * [a,b])[2]) <= 4eps(Float64)
 13 |     a,b = complex(rand(2)...), complex(rand(2)...)
 14 |     c,s,r = A.givensrot(a,b)
 15 |     @test norm(([c -conj(s); s conj(c)] * [a,b])[2]) <= 4eps(Float64)
 16 | end
 17 | 
 18 | 
 19 | 
 20 | 
 21 | # dflip
 22 | @testset "D flip" begin
 23 |     t1 = pi/3;
 24 |     alpha = complex(cos(t1), sin(t1))
 25 |     d = AMVW.ComplexComplexRotator(alpha, complex(0.0, 0.0), 1)
 26 |     u = one(AMVW.ComplexComplexRotator{Float64})
 27 |     AMVW.vals!(u, complex(1.0, 2.0), complex(2.0, 3.0)); AMVW.idx!(u, 2)
 28 |     M = A.as_full(u, 3) * A.as_full(d,3)
 29 |     A.Dflip(u, d)
 30 |     M1 = A.as_full(d, 3) * A.as_full(u,3)
 31 |     u = M - M1
 32 |     @test  maximum(norm.(u)) <= 4eps()
 33 |     
 34 |     
 35 |     
 36 |     # complexrealrotator is different
 37 |     #  U --> U D
 38 |     # D         D
 39 |     r1,r2 = ones(AMVW.ComplexRealRotator{Float64},2)
 40 |     AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 1)
 41 |     di = one(AMVW.ComplexRealRotator{Float64})
 42 |     AMVW.vals!(di, complex(cos(pi/3), sin(pi/3)), 0.0); AMVW.idx!(di, 2)
 43 |     M = A.as_full(di, 3) * A.as_full(r1, 3)
 44 |     AMVW.Dflip(di, r1)
 45 |     dic = copy(di); AMVW.idx!(dic, AMVW.idx(r1))
 46 |     M1 = AMVW.as_full(r1, 3) * AMVW.as_full(dic, 3) * AMVW.as_full(di, 3)
 47 |     @test maximum(norm.(M - M1)) <= 4eps()
 48 |     
 49 |     ## D   -->      D
 50 |     ##   U      U D
 51 |     ##
 52 |     r1,r2 = ones(AMVW.ComplexRealRotator{Float64},2)
 53 |     AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 2)
 54 |     di = one(AMVW.ComplexRealRotator{Float64})
 55 |     AMVW.vals!(di, complex(cos(pi/3), sin(pi/3)), 0.0); AMVW.idx!(di, 1)
 56 |     M = A.as_full(di, 3) * A.as_full(r1, 3)
 57 |     AMVW.Dflip(di, r1)
 58 |     dic = copy(di); AMVW.idx!(dic, AMVW.idx(r1))
 59 |     M1 = AMVW.as_full(r1, 3) * AMVW.as_full(dic, 3) * AMVW.as_full(di, 3)
 60 |     @test maximum(norm.(M - M1)) <= 4eps()
 61 |     
 62 |     ##
 63 |     ## Q   --> D   Q
 64 |     ##   D       D
 65 |     r1,r2 = ones(AMVW.ComplexRealRotator{Float64},2)
 66 |     AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 1)
 67 |     di = one(AMVW.ComplexRealRotator{Float64})
 68 |     AMVW.vals!(di, complex(cos(pi/3), sin(pi/3)), 0.0); AMVW.idx!(di, 2)
 69 |     M = A.as_full(r1, 3) * A.as_full(di, 3)
 70 |     AMVW.Dflip(di, r1)
 71 |     dic = copy(di); AMVW.idx!(dic, AMVW.idx(r1))
 72 |     M1 = AMVW.as_full(dic, 3) * AMVW.as_full(di, 3) * AMVW.as_full(r1, 3) 
 73 |     @test maximum(norm.(M - M1)) <= 4eps()
 74 |     
 75 | end
 76 | 
 77 | @testset "Fuse" begin
 78 |     ##
 79 |     # fuse
 80 |     r1,r2 = ones(AMVW.ComplexComplexRotator{Float64},2)
 81 |     AMVW.vals!(r1, complex(1.0, 2.0), complex(2.0, 3.0)); AMVW.idx!(r1, 1)
 82 |     AMVW.vals!(r2, complex(3.0, 2.0), complex(5.0, 3.0)); AMVW.idx!(r2, 1)
 83 |     M = A.as_full(r1,2) * A.as_full(r2, 2)
 84 |     A.fuse(r1, r2, Val{:left})
 85 |     M1 = A.as_full(r1, 2)
 86 |     u = M - M1
 87 |     @test maximum(norm.(u)) <= 4eps()
 88 |     
 89 |     
 90 |     
 91 |     r1,r2 = ones(AMVW.ComplexRealRotator{Float64},2)
 92 |     AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 1)
 93 |     AMVW.vals!(r2, complex(3.0, 2.0), 5.0); AMVW.idx!(r2, 1)
 94 |     M = A.as_full(r1,2) * A.as_full(r2, 2)
 95 |     alpha = A.fuse(r1, r2, Val{:left})
 96 |     M1 = A.as_full(r1, 2) * diagm([alpha, conj(alpha)])
 97 |     u = M - M1
 98 |     @test maximum(norm.(u)) <= 4eps()
 99 |     
100 |     
101 |     r1,r2 = ones(AMVW.ComplexRealRotator{Float64},2)
102 |     AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 1)
103 |     AMVW.vals!(r2, complex(3.0, 2.0), 5.0); AMVW.idx!(r2, 1)
104 |     M = A.as_full(r1,2) * A.as_full(r2, 2)
105 |     alpha = A.fuse(r1, r2, Val{:right})
106 |     M1 =  A.as_full(r2, 2)  * diagm([alpha, conj(alpha)])
107 |     u = M - M1
108 |     @test maximum(norm.(u)) <= 4eps()
109 | 
110 | end
111 | 
112 | ## Cascade
113 | @testset "Cascade" begin
114 |     D1, Q1, Q2,Q3,Q4 =  ones(AMVW.ComplexRealRotator{Float64},5)
115 |     alpha = complex(1.0, -1.0)
116 |     alpha = alpha/norm(alpha)
117 |     AMVW.vals!(D1, alpha, 0.0); AMVW.idx!(D1, 1)
118 |     AMVW.vals!(Q2, complex(1.0, 2.0), 2.0); AMVW.idx!(Q2, 2)
119 |     AMVW.vals!(Q3, complex(3.0, 2.0), 5.0); AMVW.idx!(Q3, 3)
120 |     AMVW.vals!(Q4, complex(2.0, 2.0), 3.0); AMVW.idx!(Q4, 4)
121 | 
122 |     M1 = A.as_full(D1, 5) * A.as_full(Q2, 5) * A.as_full(Q3, 5) * A.as_full(Q4, 5) 
123 |     D = ones(Complex{Float64}, 5)
124 |     Qs = [Q1, Q2, Q3, Q4]
125 |     A.cascade(Qs, D, alpha, 1, 4)
126 |     M2 = A.as_full(Q2, 5) * A.as_full(Q3, 5) * A.as_full(Q4, 5) * diagm(D)
127 |     
128 |     u = M1 - M2
129 |     @test maximum(norm.(u)) <= 4eps()
130 | 
131 | end
132 | 
133 | ##
134 | # turnover
135 | @testset "Turnover" begin
136 |     r1,r2,r3 = ones(AMVW.ComplexComplexRotator{Float64}, 3)
137 |     AMVW.vals!(r1, complex(1.0, 2.0), complex(2.0, 3.0)); AMVW.idx!(r1, 1)
138 |     AMVW.vals!(r2, complex(3.0, 2.0), complex(5.0, 3.0)); AMVW.idx!(r2, 2)
139 |     AMVW.vals!(r3, complex(4.0, 2.0), complex(6.0, 3.0)); AMVW.idx!(r3, 1)
140 |     
141 |     M = A.as_full(r1,3) * A.as_full(r2,3) * A.as_full(r3,3)
142 |     A.turnover(r1, r2, r3, Val{:right})
143 |     M1 =  A.as_full(r3,3) * A.as_full(r1,3) * A.as_full(r2,3)
144 |     u = M - M1
145 |     @test maximum(norm.(u)) <= 4eps()
146 |     
147 |     
148 |     
149 |     r1,r2,r3 = ones(AMVW.ComplexRealRotator{Float64}, 3)
150 |     AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 1)
151 |     AMVW.vals!(r2, complex(3.0, 2.0), 5.0); AMVW.idx!(r2, 2)
152 |     AMVW.vals!(r3, complex(4.0, 2.0), 6.0); AMVW.idx!(r3, 1)
153 |     
154 |     M = A.as_full(r1,3) * A.as_full(r2,3) * A.as_full(r3,3)
155 |     A.turnover(r1, r2, r3, Val{:right})
156 |     M1 =  A.as_full(r3,3) * A.as_full(r1,3) * A.as_full(r2,3)
157 |     u = M - M1
158 |     @test maximum(norm.(u)) <= 4eps()
159 |     A.turnover(r3, r1, r2, Val{:left})
160 |     M2 = A.as_full(r1,3) * A.as_full(r2,3) * A.as_full(r3,3)
161 |     u = M - M1
162 |     @test maximum(norm.(u)) <= 4eps()
163 |     
164 | end
165 | 
166 | 
167 | # passthrough
168 | @testset "Passthrough" begin
169 |     r1,r2,r3 = ones(AMVW.ComplexRealRotator{Float64}, 3)
170 |     AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 1)
171 |     t1,t2,t3=pi/3,pi/4, pi/5
172 |     cplx(t) = complex(cos(t), sin(t))
173 |     D = [cplx(t) for t in [t1,t2,t3]]
174 |     M = diagm(D) * AMVW.as_full(r1, 3)
175 |     AMVW.passthrough(D, r1)
176 |     M1 = AMVW.as_full(r1, 3) * diagm(D)
177 |     u = M - M1
178 |     @test maximum(norm.(u)) <= 4eps()
179 |     
180 |     
181 | end
182 | 
183 | 
184 | @testset "poly_roots" begin
185 | 
186 |     ## real coeffs
187 |     rs = [1.0, 2.0, 3.0]
188 |     p = poly(rs)
189 |     rts =  AMVW.poly_roots(p.a)
190 |     sort!(rts, by=norm)
191 |     @test maximum(norm.(rts - rs)) <= 1e-6
192 | 
193 |     # complex coeffs
194 |     rs = [1.0, 2.0, 3.0, 4.0 + 1.0im]
195 |     p = poly(rs)
196 |     rts = AMVW.poly_roots(p.a)
197 |     sort!(rts, by=norm)
198 |     @test maximum(norm.(rts - rs)) <= 1e-6
199 | 
200 | 
201 |     # ComplexComplex
202 |     rs = [1.0, 2.0, 3.0, 4.0+0im]
203 |     p = poly(rs)
204 |     qs, k = AMVW.reverse_poly(p.a)
205 |     state = AMVW.ComplexComplexSingleShift(qs)
206 |     AMVW.init_state(state)
207 |     AMVW.AMVW_algorithm(state)
208 |     rts = complex.(state.REIGS, state.IEIGS)
209 |     sort!(rts, by=norm)
210 |     @test maximum(norm.(rts - rs)) <= 1e-6
211 | 
212 |     ## simple cases
213 |     # n=1
214 |     rs = [1.0]
215 |     p = poly(rs)
216 |     rts = AMVW.poly_roots(p.a)
217 |     @test maximum(norm.(rts - rs)) <= 1e-6
218 | 
219 |     rs = [1.0 + im]
220 |     p = poly(rs)
221 |     rts = AMVW.poly_roots(p.a)
222 |     @test maximum(norm.(rts - rs)) <= 1e-6
223 | 
224 |     # n = 2
225 |     rs = [1.0, 2.0]
226 |     p = poly(rs)
227 |     rts = AMVW.poly_roots(p.a)
228 |     sort!(rts, by=norm)
229 |     @test maximum(norm.(rts - rs)) <= 1e-6
230 | 
231 |     rs = [1.0, 2.0+im]
232 |     p = poly(rs)
233 |     rts = AMVW.poly_roots(p.a)
234 |     sort!(rts, by=norm)
235 |     @test maximum(norm.(rts - rs)) <= 1e-6
236 |     
237 |     # zeros
238 |     rs = [1.0, 2.0, 3.0, 0.0, 0.0]
239 |     sort!(rs, by=norm)
240 |     p = poly(rs)
241 |     rts = AMVW.poly_roots(p.a)
242 |     sort!(rts, by=norm)
243 |     @test maximum(norm.(rts - rs)) <= 1e-6
244 | 
245 |     rs = [1.0, 2.0, 3.0+im, 0.0, 0.0]
246 |     sort!(rs, by=norm)
247 |     p = poly(rs)
248 |     rts = AMVW.poly_roots(p.a)
249 |     sort!(rts, by=norm)
250 |     @test maximum(norm.(rts - rs)) <= 1e-6
251 |     
252 | end
253 | 


--------------------------------------------------------------------------------
/src/types.jl:
--------------------------------------------------------------------------------
  1 | ## Types
  2 | 
  3 | ## A container for our counters
  4 | mutable struct AMVW_Counter
  5 |     zero_index::Int
  6 |     start_index::Int
  7 |     stop_index::Int
  8 |     it_count::Int
  9 |     tr::Int
 10 | end
 11 | 
 12 | 
 13 | ## Rotators
 14 | 
 15 | ## Our rotators have field names c, s where c nad s are either T or Complex{T}
 16 | @compat abstract type CoreTransform{T} end
 17 | @compat abstract type Rotator{T} <: CoreTransform{T} end
 18 | 
 19 | is_diagonal{T}(r::Rotator{T}) = norm(r.s) <= eps(T)
 20 | 
 21 | 
 22 | Base.copy(a::Rotator) = Rotator(a.c, a.s, a.i)
 23 | function Base.copy!(a::Rotator, b::Rotator)
 24 |     vals!(a, vals(b)...)
 25 |     idx!(a, idx(b))
 26 | end
 27 | 
 28 | ## set values
 29 | vals{T}(r::Rotator{T}) = (r.c, r.s)
 30 | idx(r::Rotator) = r.i
 31 | idx!(r::Rotator, i::Int) = r.i = i
 32 | 
 33 | 
 34 | 
 35 | 
 36 | #the index is superflous for now, and a bit of a hassle to keep immutable
 37 | #but might be of help later if twisting is approached. Shouldn't effect speed, but does mean 3N storage (Q, Ct, B)
 38 | #so may be
 39 | #
 40 | mutable struct RealRotator{T} <: Rotator{T}
 41 | c::T
 42 | s::T
 43 | i::Int
 44 | end
 45 | 
 46 | function Base.ctranspose(r::RealRotator)
 47 |     RealRotator(r.c, -r.s, r.i)
 48 | end
 49 | 
 50 | 
 51 | Base.one{T}(::Type{RealRotator{T}}) = RealRotator(one(T), zero(T), 0)
 52 | Base.ones{T}(S::Type{RealRotator{T}}, N) = [one(S) for i in 1:N]
 53 | 
 54 | ## set values
 55 | function vals!{T}(r::RealRotator, c::T, s::T)
 56 |     # normalize in case of roundoff errors
 57 |     # but, using hueristic on 6.3 on square roots
 58 |     
 59 |     nrmi = sqrt(c^2 + s^2 )
 60 |     nrmi = norm(nrmi - one(T)) >= 1e2*eps(T) ? inv(nrmi) : one(T)
 61 |     r.c = c * nrmi
 62 |     r.s = s * nrmi
 63 | end
 64 | 
 65 | ##################################################
 66 | ### Okay, now try with complex C, real s
 67 |     
 68 | mutable struct ComplexRealRotator{T} <: Rotator{T}
 69 | c::Complex{T}
 70 | s::T
 71 | i::Int
 72 | end
 73 | 
 74 | function Base.ctranspose(r::ComplexRealRotator)
 75 |     ComplexRealRotator(conj(r.c), -r.s, r.i)
 76 | end
 77 | 
 78 | function vals!{T}(r::ComplexRealRotator, c::Complex{T}, s::T)
 79 |     # normalize in case of roundoff errors
 80 |     # but, using hueristic on 6.3 on square roots
 81 |     
 82 |     nrmi = sqrt(abs(c * conj(c) + s^2))
 83 |     nrmi = norm(nrmi - one(T)) >= eps(T) ? inv(nrmi) : one(T)
 84 |     r.c = c * nrmi
 85 |     r.s = s * nrmi
 86 | end
 87 | function vals!{T}(r::ComplexRealRotator, c::Complex{T}, s::Complex{T})
 88 | ##    abs(imag(s)) < 4eps(T) || error("setting vals needs real s, got $s")
 89 |     vals!(r, c, real(s))
 90 | end
 91 | vals!{T}(r::ComplexRealRotator{T}, c::T, s::T) = vals!(r, complex(c, zero(T)), s)
 92 | 
 93 | Base.one{T}(::Type{ComplexRealRotator{T}}) = ComplexRealRotator(complex(one(T), zero(T)), zero(T), 0)
 94 | Base.ones{T}(S::Type{ComplexRealRotator{T}}, N) = [one(S) for i in 1:N]
 95 | 
 96 | 
 97 | 
 98 | Base.copy(a::ComplexRealRotator) = ComplexRealRotator(a.c, a.s, a.i)
 99 | function Base.copy!(a::ComplexRealRotator, b::ComplexRealRotator)
100 |     vals!(a, vals(b)...)
101 |     idx!(a, idx(b))
102 | end
103 |   
104 | 
105 | 
106 | 
107 | ##################################################
108 | ## We use two complex, rather than 3 reals here.
109 | ## Will be basically the ame storage, as we don't need to include a D, but not quite (12N, not 11N)
110 |     
111 | mutable struct ComplexComplexRotator{T} <: Rotator{T}
112 | c::Complex{T}
113 | s::Complex{T}
114 | i::Int
115 | end
116 | 
117 | function Base.ctranspose(r::ComplexComplexRotator)
118 |     ComplexComplexRotator(conj(r.c), -r.s, r.i)
119 | end
120 | 
121 | 
122 | Base.one{T}(::Type{ComplexComplexRotator{T}}) = ComplexComplexRotator(complex(one(T), zero(T)), complex(zero(T), zero(T)), 0)
123 | Base.ones{T}(S::Type{ComplexComplexRotator{T}}, N) = [one(S) for i in 1:N]
124 | 
125 | ## set values
126 | function vals!{T}(r::ComplexComplexRotator, c::Complex{T}, s::Complex{T})
127 |     # normalize in case of roundoff errors
128 |     # but, using hueristic on 6.3 on square roots
129 |     
130 |     nrmi = sqrt(abs(c * conj(c) + s * conj(s)))
131 |     nrmi = norm(nrmi - one(T)) >= eps(T) ? inv(nrmi) : one(T)
132 |     r.c = c * nrmi
133 |     r.s = s * nrmi
134 | end
135 | vals!{T}(r::ComplexComplexRotator, c::Complex{T}, s::T) = vals!(r, c, complex(s,zero(T)))
136 | vals!{T}(r::ComplexComplexRotator{T}, c::T, s::T) = vals!(r, complex(c, zero(T)), complex(s, zero(T)))
137 | 
138 | 
139 | 
140 | 
141 | 
142 | ### Shift Types
143 | 
144 | 
145 | @compat abstract type ShiftType{T} end
146 | struct RealDoubleShift{T} <: ShiftType{T} 
147 |     N::Int
148 |     POLY::Vector{T}
149 |     Q::Vector{RealRotator{T}}
150 |     Ct::Vector{RealRotator{T}}  # We use C', not C here
151 |     B::Vector{RealRotator{T}}
152 |     REIGS::Vector{T}
153 |     IEIGS::Vector{T}
154 |     ## reusable storage
155 |     U::RealRotator{T}
156 |     Ut::RealRotator{T}
157 |     V::RealRotator{T}
158 |     Vt::RealRotator{T}
159 |     W::RealRotator{T}
160 |     A::Matrix{T}    # for parts of A = QR
161 |     Bk::Matrix{T}   # for diagonal block
162 |     R::Matrix{T}    # temp storage, sometimes R part of QR
163 |     e1::Vector{T}   # eigen values e1, e2
164 |     e2::Vector{T}
165 |     ctrs::AMVW_Counter
166 | end
167 | 
168 | function Base.convert{T}(::Type{RealDoubleShift}, ps::Vector{T})
169 |     N = length(ps)
170 |     
171 |     RealDoubleShift(N, ps,
172 |                     ones(RealRotator{T}, N), #Q
173 |                     ones(RealRotator{T}, N), #Ct
174 |                     ones(RealRotator{T}, N), #B
175 |                     zeros(T, N),  zeros(T, N), #EIGS
176 |                     one(RealRotator{T}), one(RealRotator{T}),
177 |                     one(RealRotator{T}), one(RealRotator{T}),
178 |                     one(RealRotator{T}), #U,U',V,V',W
179 |                     zeros(T, 2, 2),zeros(T, 3, 2),zeros(T, 3, 2), # A Bk R
180 |                     zeros(T,2), zeros(T,2),
181 |                     AMVW_Counter(0,1,N-1, 0, N-2)
182 |     )
183 | end
184 | 
185 | #######################################################
186 | ## State for ComplexReal type
187 | 
188 | mutable struct ComplexRealSingleShift{T} <: ShiftType{T} 
189 |     N::Int
190 |     POLY::Vector{Complex{T}}
191 |     Q::Vector{ComplexRealRotator{T}}
192 |     Ct::Vector{ComplexRealRotator{T}}  # We use C', not C here
193 | B::Vector{ComplexRealRotator{T}}
194 | D::Vector{Complex{T}}
195 | Dp::Vector{Complex{T}}
196 |     REIGS::Vector{T}
197 |     IEIGS::Vector{T}
198 |     ## reusable storage
199 | U::ComplexRealRotator{T}
200 | Ut::ComplexRealRotator{T}
201 | Di::ComplexRealRotator{T}
202 |     A::Matrix{Complex{T}}    # for parts of A = QR
203 |     Bk::Matrix{Complex{T}}   # for diagonal block
204 |     R::Matrix{Complex{T}}    # temp storage, sometimes R part of QR
205 |     e1::Vector{T}   # eigen values e1, e2, store as (re,imag)
206 | e2::Vector{T}
207 | ray::Bool
208 |     ctrs::AMVW_Counter
209 | end
210 | 
211 | function Base.convert{T}(::Type{ComplexRealSingleShift}, ps::Vector{Complex{T}})
212 |     N = length(ps)
213 |     
214 |     ComplexRealSingleShift(N, ps,
215 |                        ones(ComplexRealRotator{T}, N), #Q
216 |                        ones(ComplexRealRotator{T}, N), #Ct
217 |                        ones(ComplexRealRotator{T}, N), #B
218 |                            ones(Complex{T}, N+1), # D
219 |                            ones(Complex{T}, 2), # Dp ## XXX try to cut allocations in passthrough
220 |                        zeros(T, N),  zeros(T, N), #EIGS
221 |                        one(ComplexRealRotator{T}), one(ComplexRealRotator{T}), #U, Ut
222 |                        one(ComplexRealRotator{T}), # Di
223 |                        zeros(Complex{T}, 2, 2),zeros(Complex{T}, 3, 2),
224 |                        zeros(Complex{T}, 3, 2), # A Bk R
225 |     zeros(T,2), zeros(T,2),
226 |     #    true,  # true for Wilkinson, false for Rayleigh.Make adjustable!
227 |     false,
228 |     AMVW_Counter(0,1,N-1, 0, N-2)
229 |     )
230 | end
231 | 
232 | ##################################################
233 | ## State for ComplexComplex Rotator type (no D)
234 | 
235 | struct ComplexComplexSingleShift{T} <: ShiftType{T} 
236 |     N::Int
237 |     POLY::Vector{Complex{T}}
238 |     Q::Vector{ComplexComplexRotator{T}}
239 |     Ct::Vector{ComplexComplexRotator{T}}  # We use C', not C here
240 |     B::Vector{ComplexComplexRotator{T}}  
241 |     REIGS::Vector{T}
242 |     IEIGS::Vector{T}
243 |     ## reusable storage
244 | U::ComplexComplexRotator{T}
245 | Ut::ComplexComplexRotator{T}
246 |     A::Matrix{Complex{T}}    # for parts of A = QR
247 |     Bk::Matrix{Complex{T}}   # for diagonal block
248 |     R::Matrix{Complex{T}}    # temp storage, sometimes R part of QR
249 |     e1::Vector{T}   # eigen values e1, e2, store as (re,imag)
250 | e2::Vector{T}
251 | ray::Bool
252 |     ctrs::AMVW_Counter
253 | end
254 | 
255 | function Base.convert{T}(::Type{ComplexComplexSingleShift}, ps::Vector{Complex{T}})
256 |     N = length(ps)
257 |     
258 |     ComplexComplexSingleShift(N, ps,
259 |                        ones(ComplexComplexRotator{T}, N), #Q
260 |                        ones(ComplexComplexRotator{T}, N), #Ct
261 |                        ones(ComplexComplexRotator{T}, N), #B
262 |                        zeros(T, N),  zeros(T, N), #EIGS
263 |                        one(ComplexComplexRotator{T}), one(ComplexComplexRotator{T}), #U, Ut
264 |                        zeros(Complex{T}, 2, 2),zeros(Complex{T}, 3, 2),
265 |                        zeros(Complex{T}, 3, 2), # A Bk R
266 |     zeros(T,2), zeros(T,2),
267 |     false,  # true for Wilkinson, false for Rayleigh
268 |     AMVW_Counter(0,1,N-1, 0, N-2)
269 |     )
270 | end
271 | 
272 | 


--------------------------------------------------------------------------------
/test/benchmark.jl:
--------------------------------------------------------------------------------
  1 | using AMVW
  2 | using BenchmarkTools
  3 | using Polynomials
  4 | using PolynomialRoots
  5 | using DataFrames
  6 | 
  7 | function _residual_check(rs, rts)
  8 |     p = poly(rs)
  9 |     pp = polyder(p)
 10 | 
 11 |     # r1 = |P(lambda)/P'(lambda)|
 12 |     # r2 = |P(lambda)/P'(lambda)/lambda|
 13 | 
 14 |     r0 = maximum(norm.(sort(rs, by=norm) - sort(rts, by=norm)))  # nonsensical for some cases
 15 |     r1 = maximum(norm.(p.(rts) ./ pp.(rts)))
 16 |     r2 = maximum(norm.(p.(rts) ./ pp.(rts) ./ rts ))
 17 | 
 18 |     [r0, r1, r2]
 19 | end
 20 | 
 21 | function residual_check(rs)
 22 |     A = DataFrame(Polynomials=zeros(3),AMVW=zeros(3),PolynomialRoots=zeros(3))
 23 | 
 24 |     p = poly(rs)
 25 |     A[:,1] = _residual_check(rs, Polynomials.roots(p))
 26 |     A[:,2] = _residual_check(rs, AMVW.poly_roots(p.a))
 27 |     A[:,3] = _residual_check(rs, PolynomialRoots.roots(p.a))
 28 | 
 29 |     A
 30 | end
 31 | 
 32 | 
 33 | # small n, real
 34 | n = 10
 35 | rs = linspace(1/n, 1, n)
 36 | p = poly(rs)
 37 | 
 38 | @benchmark Polynomials.roots(p)
 39 | # julia> @benchmark Polynomials.roots(p)
 40 | # BenchmarkTools.Trial: 
 41 | #   memory estimate:  8.20 KiB
 42 | #   allocs estimate:  50
 43 | #   --------------
 44 | #   minimum time:     98.108 μs (0.00% GC)
 45 | #   median time:      102.220 μs (0.00% GC)
 46 | #   mean time:        108.525 μs (1.16% GC)
 47 | #   maximum time:     3.642 ms (94.16% GC)
 48 | #   --------------
 49 | #   samples:          10000
 50 | #   evals/sample:     1
 51 | #   time tolerance:   5.00%
 52 | #   memory tolerance: 1.00%
 53 | 
 54 | @benchmark AMVW.poly_roots(p.a)
 55 | # julia> @benchmark AMVW.poly_roots(p.a)
 56 | # BenchmarkTools.Trial: 
 57 | #   memory estimate:  4.27 KiB
 58 | #   allocs estimate:  66
 59 | #   --------------
 60 | #   minimum time:     93.309 μs (0.00% GC)
 61 | #   median time:      150.606 μs (0.00% GC)
 62 | #   mean time:        155.830 μs (0.26% GC)
 63 | #   maximum time:     4.237 ms (94.03% GC)
 64 | #   --------------
 65 | #   samples:          10000
 66 | #   evals/sample:     1
 67 | #   time tolerance:   5.00%
 68 | #   memory tolerance: 1.00%
 69 | 
 70 | 
 71 | 
 72 | 
 73 | @benchmark PolynomialRoots.roots(p.a)
 74 | 
 75 | # with deprecation warnings
 76 | # BenchmarkTools.Trial: 
 77 | #   memory estimate:  12.20 KiB
 78 | #   allocs estimate:  77
 79 | #   --------------
 80 | #   minimum time:     229.386 μs (0.00% GC)
 81 | #   median time:      241.773 μs (0.00% GC)
 82 | #   mean time:        253.786 μs (0.62% GC)
 83 | #   maximum time:     8.150 ms (0.00% GC)
 84 | #   --------------
 85 | #   samples:          10000
 86 | #   evals/sample:     1
 87 | #   time tolerance:   5.00%
 88 | #   memory tolerance: 1.00%
 89 | 
 90 | # Complex polynomials
 91 | rs = [x+im for x in 1.0 : 6]
 92 | p = poly(rs)
 93 | 
 94 | @benchmark Polynomials.roots(p)
 95 | 
 96 | # julia> @benchmark Polynomials.roots(p)
 97 | # BenchmarkTools.Trial: 
 98 | #   memory estimate:  5.59 KiB
 99 | #   allocs estimate:  52
100 | #   --------------
101 | #   minimum time:     33.073 μs (0.00% GC)
102 | #   median time:      34.435 μs (0.00% GC)
103 | #   mean time:        36.119 μs (1.96% GC)
104 | #   maximum time:     3.713 ms (95.31% GC)
105 | #   --------------
106 | #   samples:          10000
107 | #   evals/sample:     1
108 | #   time tolerance:   5.00%
109 | #   memory tolerance: 1.00%
110 | 
111 | @benchmark AMVW.poly_roots(p.a)
112 | # BenchmarkTools.Trial: 
113 | #   memory estimate:  4.66 KiB
114 | #   allocs estimate:  56
115 | #   --------------
116 | #   minimum time:     102.511 μs (0.00% GC)
117 | #   median time:      143.153 μs (0.00% GC)
118 | #   mean time:        157.052 μs (0.70% GC)
119 | #   maximum time:     49.993 ms (0.00% GC)
120 | #   --------------
121 | #   samples:          10000
122 | #   evals/sample:     1
123 | #   time tolerance:   5.00%
124 | # memory tolerance: 1.00%
125 | 
126 | @benchmark PolynomialRoots.roots(p.a)
127 | # BenchmarkTools.Trial: 
128 | #   memory estimate:  11.63 KiB
129 | #   allocs estimate:  76
130 | #   --------------
131 | #   minimum time:     226.424 μs (0.00% GC)
132 | #   median time:      232.025 μs (0.00% GC)
133 | #   mean time:        248.389 μs (0.89% GC)
134 | #   maximum time:     7.717 ms (0.00% GC)
135 | #   --------------
136 | #   samples:          10000
137 | #   evals/sample:     1
138 | #   time tolerance:   5.00%
139 | # memory tolerance: 1.00%
140 | 
141 | ## XXX We have an issue with this one though
142 | n = 10
143 | ts = linspace(1/n, 1.0, n) * 2pi
144 | rs = [complex(cos(t), sin(t)) for t in ts]
145 | p = poly(rs) 
146 | 
147 | @benchmark Polynomials.roots(p)
148 | # julia> @benchmark PolynomialRoots.roots(p.a)
149 | # BenchmarkTools.Trial: 
150 | #   memory estimate:  11.95 KiB
151 | #   allocs estimate:  76
152 | #   --------------
153 | #   minimum time:     301.440 μs (0.00% GC)
154 | #   median time:      306.313 μs (0.00% GC)
155 | #   mean time:        326.734 μs (0.61% GC)
156 | #   maximum time:     4.879 ms (91.35% GC)
157 | #   --------------
158 | #   samples:          10000
159 | #   evals/sample:     1
160 | #   time tolerance:   5.00%
161 | #   memory tolerance: 1.00%
162 | 
163 | @benchmark AMVW.poly_roots(p.a) #  use state.ray=false
164 | # ## XXX Stil misses alot
165 | # BenchmarkTools.Trial: 
166 | #   memory estimate:  5.88 KiB
167 | #   allocs estimate:  68
168 | #   --------------
169 | #   minimum time:     354.586 μs (0.00% GC)
170 | #   median time:      401.278 μs (0.00% GC)
171 | #   mean time:        443.340 μs (0.30% GC)
172 | #   maximum time:     6.563 ms (90.24% GC)
173 | #   --------------
174 | #   samples:          10000
175 | #   evals/sample:     1
176 | #   time tolerance:   5.00%
177 | #   memory tolerance: 1.00%
178 | 
179 | 
180 | @benchmark PolynomialRoots.roots(p.a)
181 | 
182 | # julia> @benchmark PolynomialRoots.roots(p.a)
183 | # BenchmarkTools.Trial: 
184 | #   memory estimate:  11.95 KiB
185 | #   allocs estimate:  76
186 | #   --------------
187 | #   minimum time:     302.533 μs (0.00% GC)
188 | #   median time:      309.085 μs (0.00% GC)
189 | #   mean time:        330.070 μs (0.60% GC)
190 | #   maximum time:     5.024 ms (88.43% GC)
191 | #   --------------
192 | #   samples:          10000
193 | #   evals/sample:     1
194 | #   time tolerance:   5.00%
195 | #   memory tolerance: 1.00%
196 | 
197 | # "Big" polynomials
198 | n = 10
199 | rs = linspace(1/n, big(1.0), n)
200 | 
201 | p = poly(rs) 
202 | 
203 | @benchmark Polynomials.roots(p)
204 | ## Error (no support for big)
205 | 
206 | @benchmark AMVW.poly_roots(p.a)
207 | # julia> @benchmark AMVW.poly_roots(p.a)
208 | # BenchmarkTools.Trial: 
209 | #   memory estimate:  10.14 MiB
210 | #   allocs estimate:  204777
211 | #   --------------
212 | #   minimum time:     26.071 ms (0.00% GC)
213 | #   median time:      56.661 ms (33.13% GC)
214 | #   mean time:        52.697 ms (24.94% GC)
215 | #   maximum time:     74.463 ms (25.16% GC)
216 | #   --------------
217 | #   samples:          95
218 | #   evals/sample:     1
219 | #   time tolerance:   5.00%
220 | #   memory tolerance: 1.00%
221 | 
222 | # julia> maximum(norm.(sort(AMVW.poly_roots(p.a), by=norm) - sort(rs, by=norm)))
223 | # 3.542606431077360733112308774961528146389295822293876354568756300943283335489686e-71
224 | 
225 | @benchmark PolynomialRoots.roots(p.a) 
226 | # BenchmarkTools.Trial: 
227 | #   memory estimate:  1.25 MiB
228 | #   allocs estimate:  26226
229 | #   --------------
230 | #   minimum time:     1.669 ms (0.00% GC)
231 | #   median time:      1.873 ms (0.00% GC)
232 | #   mean time:        3.034 ms (36.15% GC)
233 | #   maximum time:     26.625 ms (91.54% GC)
234 | #   --------------
235 | #   samples:          1621
236 | #   evals/sample:     1
237 | #   time tolerance:   5.00%
238 | #   memory tolerance: 1.00%
239 | 
240 | # julia> maximum(norm.(sort(PolynomialRoots.roots(p.a), by=norm) - sort(rs, by=norm)))
241 | # 1.154958001715555551976043766371611436643198308737886091026144983582996624795927e-72
242 | 
243 | ##################################################
244 | 
245 | # larger n (50)
246 | 
247 | n = 50
248 | rs = linspace(1/n, 1, n)
249 | p = poly(rs)
250 | 
251 | @benchmark Polynomials.roots(p)
252 | # julia> @benchmark Polynomials.roots(p)
253 | # BenchmarkTools.Trial: 
254 | #   memory estimate:  61.78 KiB
255 | #   allocs estimate:  82
256 | #   --------------
257 | #   minimum time:     1.006 ms (0.00% GC)
258 | #   median time:      1.073 ms (0.00% GC)
259 | #   mean time:        1.128 ms (0.57% GC)
260 | #   maximum time:     4.822 ms (65.08% GC)
261 | #   --------------
262 | #   samples:          4306
263 | #   evals/sample:     1
264 | #   time tolerance:   5.00%
265 | #   memory tolerance: 1.00%
266 | 
267 | 
268 | @benchmark AMVW.poly_roots(p.a)
269 | # julia> @benchmark AMVW.poly_roots(p.a)
270 | # BenchmarkTools.Trial: 
271 | #   memory estimate:  12.73 KiB
272 | #   allocs estimate:  186
273 | #   --------------
274 | #   minimum time:     1.556 ms (0.00% GC)
275 | #   median time:      2.432 ms (0.00% GC)
276 | #   mean time:        2.515 ms (0.08% GC)
277 | #   maximum time:     8.164 ms (46.20% GC)
278 | #   --------------
279 | #   samples:          1962
280 | #   evals/sample:     1
281 | #   time tolerance:   5.00%
282 | #   memory tolerance: 1.00%
283 | 
284 | @benchmark PolynomialRoots.roots(p.a)
285 | # julia> @benchmark PolynomialRoots.roots(p.a)
286 | # BenchmarkTools.Trial: 
287 | #   memory estimate:  14.72 KiB
288 | #   allocs estimate:  77
289 | #   --------------
290 | #   minimum time:     422.714 μs (0.00% GC)
291 | #   median time:      426.969 μs (0.00% GC)
292 | #   mean time:        446.475 μs (0.53% GC)
293 | #   maximum time:     4.512 ms (88.61% GC)
294 | #   --------------
295 | #   samples:          10000
296 | #   evals/sample:     1
297 | #   time tolerance:   5.00%
298 | #   memory tolerance: 1.00%
299 | 
300 | ##################################################
301 | 
302 | # residual check
303 | n = 10;
304 | rs = linspace(1/n, 1, n);
305 | p = poly(rs);
306 | residual_check(rs)
307 | 
308 | # julia> residual_check(rs)
309 | # 3×3 DataFrames.DataFrame
310 | # │ Row │ Polynomials │ AMVW       │ PolynomialRoots │
311 | # ├─────┼─────────────┼────────────┼─────────────────┤
312 | # │ 1   │ 2.09855e-10 │ 1.18857e-9 │ 5.75882e-11     │
313 | # │ 2   │ 1.4072e-10  │ 1.13841e-9 │ 3.38759e-11     │
314 | # │ 3   │ 2.01029e-10 │ 2.06593e-9 │ 4.23449e-11     │
315 | 
316 | n = 15;
317 | rs = linspace(1/n, 1, n);
318 | p = poly(rs);
319 | residual_check(rs)
320 | 
321 | # julia> residual_check(rs)
322 | # 3×3 DataFrames.DataFrame
323 | # │ Row │ Polynomials │ AMVW       │ PolynomialRoots │
324 | # ├─────┼─────────────┼────────────┼─────────────────┤
325 | # │ 1   │ 2.23049e-6  │ 2.742e-5   │ 1.25911e-7      │
326 | # │ 2   │ 2.30464e-6  │ 2.74255e-5 │ 2.07169e-7      │
327 | # │ 3   │ 3.14269e-6  │ 4.83827e-5 │ 3.10753e-7      │
328 | 
329 | 
330 | n = 50;
331 | rs = linspace(1/n, 1, n);
332 | p = poly(rs);
333 | residual_check(rs)
334 | 
335 | # julia> residual_check(rs)
336 | # 3×3 DataFrames.DataFrame
337 | # │ Row │ Polynomials │ AMVW       │ PolynomialRoots │
338 | # ├─────┼─────────────┼────────────┼─────────────────┤
339 | # │ 1   │ 0.943682    │ 0.640697   │ 0.594116        │
340 | # │ 2   │ 0.0213801   │ 0.0183637  │ 0.0446998       │
341 | # │ 3   │ NaN         │ 1.93315e10 │ 0.0488876       │
342 | 
343 | n = 10
344 | ts = linspace(1/n, 1.0, n) * 2pi
345 | rs = [complex(cos(t), sin(t)) for t in ts]
346 | p = poly(rs) 
347 | residual_check(rs)
348 | 
349 | # 3×3 DataFrames.DataFrame
350 | # │ Row │ Polynomials │ AMVW        │ PolynomialRoots │
351 | # ├─────┼─────────────┼─────────────┼─────────────────┤
352 | # │ 1   │ 1.90211     │ 1.90211     │ 1.90211         │
353 | # │ 2   │ 3.39034e-15 │ 3.52069e-15 │ 3.5958e-16      │
354 | # │ 3   │ 3.39034e-15 │ 3.52069e-15 │ 3.5958e-16      │
355 | 
356 | 


--------------------------------------------------------------------------------
/src/factorization.jl:
--------------------------------------------------------------------------------
  1 | ## 
  2 | ## initial factorization
  3 | ## This is for complex real where we have a D matrix for phases
  4 | function QDCB_factorization{T}(state::ComplexRealSingleShift{T})
  5 | 
  6 |     N, ps= state.N, state.POLY
  7 |     par = iseven(N) ? one(T) : -one(T)
  8 | 
  9 |     
 10 |     Q, Ct, B = state.Q, state.Ct, state.B
 11 |     state.D[:] = ones(Complex{T}, N+1)
 12 |     Dn = ones(Complex{T}, 2)
 13 |     
 14 |     for ii = 1:(N-1)
 15 |         vals!(Q[ii], zero(Complex{T}), one(T))
 16 |         idx!(Q[ii], ii)
 17 |     end
 18 |     vals!(Q[N], one(Complex{T}), zero(T))
 19 |     idx!(Q[N], N)
 20 | 
 21 | 
 22 |     ## Working, but not quite what is in DFCC code
 23 |     ## there -par*ps[N], par*one(T); C is -conj(c), -s
 24 |     ## B[N] = par, -par...
 25 |     c, s, temp = givensrot(par * conj(ps[N]), -par * one(T)) # <<<- conj(ps[N])!!
 26 | 
 27 |     nrm = norm(c)
 28 |     alpha = c/nrm
 29 |     
 30 |     vals!(Ct[N], conj(c), -s);
 31 |     idx!(Ct[N], N)
 32 | 
 33 |     vals!(B[N], -par*s*alpha, par*norm(c))
 34 |     idx!(B[N], N)
 35 |     
 36 |     state.D[N] = alpha
 37 |     state.D[N+1] = conj(alpha)
 38 |     
 39 |     for ii in 2:N
 40 |         c, s, temp = givensrot(-ps[ii-1], temp)
 41 |         vals!(Ct[N-ii + 1], conj(c*alpha), -s)  # note alpha
 42 |         idx!(Ct[N-ii+1], N-ii+1)
 43 |         
 44 |         vals!(B[N-ii + 1], c*alpha, s)          # note alpha
 45 |         idx!(B[N-ii+1], N-ii+1)
 46 |     end
 47 | end
 48 | init_state{T}(state::ComplexRealSingleShift{T}) = QDCB_factorization(state)
 49 | 
 50 | ## 
 51 | ## initial factorization for
 52 | ## RealDoubleShift and ComplexComplex
 53 | function QCB_factorization{T}(state::ShiftType{T})
 54 | 
 55 |     N, ps= state.N, state.POLY
 56 |     par = iseven(N) ? one(T) : -one(T)
 57 | 
 58 |     
 59 |     if isa(state, RealDoubleShift)
 60 |         const ZERO, ONE = zero(T), one(T)
 61 |     else
 62 |         const ZERO, ONE = zero(Complex{T}), one(Complex{T})
 63 |     end
 64 |     
 65 | 
 66 |     
 67 |     Q, Ct, B = state.Q, state.Ct, state.B
 68 | 
 69 |     for ii = 1:(N-1)
 70 |         vals!(Q[ii], ZERO, ONE)
 71 |         idx!(Q[ii], ii)
 72 |     end
 73 |     vals!(Q[N], ONE, ZERO)
 74 |     idx!(Q[N], N)
 75 | 
 76 | 
 77 |     ## Working, but not quite what is in DFCC code
 78 |     ## there -par*ps[N], par*one(T); C is -conj(c), -s
 79 |     ## B[N] = par, -par...
 80 |     c, s, temp = givensrot(par * ps[N], -par * one(T))
 81 |     vals!(Ct[N], conj(c), -s); idx!(Ct[N], N)
 82 | 
 83 |     vals!(B[N], -par*s, par*conj(c))     
 84 |     idx!(B[N], N)
 85 |     
 86 |     for ii in 2:N
 87 |         c, s, temp = givensrot(-ps[ii-1], temp)
 88 |         vals!(Ct[N-ii + 1], conj(c), -s)
 89 |         idx!(Ct[N-ii+1], N-ii+1)
 90 |         
 91 |         vals!(B[N-ii + 1], c, s)
 92 |         idx!(B[N-ii+1], N-ii+1)
 93 |     end
 94 | 
 95 | end
 96 | init_state{T}(state::ShiftType{T}) = QCB_factorization(state)
 97 | 
 98 | # If there is an issue, this function can be used to resetart the algorithm
 99 | # could be merged with init_state?
100 | function restart{T}(state::ShiftType{T})
101 |     # try again
102 |     init_state(state)
103 |     
104 |     for i in 1:state.N
105 |         state.REIGS[i] = state.IEIGS[i] = zero(T)
106 |     end
107 |     state.ctrs.zero_index = 0
108 |     state.ctrs.start_index = 1
109 |     state.ctrs.stop_index = state.N - 1
110 |     state.ctrs.it_count = 0
111 |     state.ctrs.tr = state.N - 2
112 | end
113 | 
114 | 
115 | ### Related to decompostion QR into QC(B + ...)
116 | 
117 | 
118 | ## we need to find A[k:k+2, k:k+1] for purposes of computing eigenvalues, either
119 | ## to give the shifts or to find the roots after deflation.
120 | ##
121 | ## fill A[k:k+2, k:k+1] k in 2:N
122 | ## updates state.A
123 | ##
124 | # We look for r_j,k. Depending on |j-k| there are different amounts of work
125 | # we have wk = (B + e1 y^t) * ek = B * ek + e1 yk; we consider B* ek only B1 ... Bk D ek applies
126 | #
127 | 
128 | # julia> Bi*Bj*Bk * [0,0,1,0]
129 | # julia> rotm(bi1, bi2, 1, 4) * rotm(bj1, bj2, 2, 4) * rotm(bk1, bk2, 3, 4) * [0,0,1,0]
130 | # 4-element Array{SymPy.Sym,1}
131 | # ⎡       ___ ___ ⎤
132 | # ⎢bk₁⋅bi₂⋅bj₂ ⎥
133 | # ⎢               ⎥
134 | # ⎢        ___ ___⎥
135 | # ⎢-bk₁⋅bi₁⋅bj₂⎥
136 | # ⎢               ⎥
137 | # ⎢         ___   ⎥
138 | # ⎢  bk₁⋅bj₁   ⎥
139 | # ⎢               ⎥
140 | # ⎣    bk₂     ⎦
141 | # which gives [what, wj, wk, wl]
142 | 
143 | # For rkk, we have Ck * W = [rkk, 0]
144 | # @vars ck1 ck2 what w1
145 | # u = rotm(ck1, ck2, 1,2) * [what, w1]
146 | # u[1](what => solve(u[2], what)[1]) |> simplify
147 | #     ⎛    ___      2⎞ 
148 | # -w₁⋅⎝ck₁⋅ck₁ + ck₂ ⎠ 
149 | # ───────────────────── this is rkk = -w1/ck_s 
150 | #          ck₂    
151 | # 
152 | # For r[k-1, k] we need to do more work. We need [what_{k-1}, w_k, w_{k+1}], where w_k, w_{k+1} found from B values as above.
153 | #
154 | # julia> @vars ck1 ck2 cj1 cj2 what w w1
155 | # (ck1, ck2, cj1, cj2, what, w, w1)
156 | 
157 | # julia> u = rotm(ck1, ck2, 2, 3) * rotm(cj1, cj2, 1, 3) * [what, w, w1]  # C^*_{k} * C^*{k-1} * W = [r_{k-1,k}, r_{k,k}, 0]
158 | # 3-element Array{SymPy.Sym,1}
159 | # ⎡        cj₁⋅ŵ - cj₂⋅w         ⎤
160 | # ⎢                               ⎥
161 | # ⎢                __         ⎥
162 | # ⎢cj₂⋅ck₁⋅ŵ + ck₁⋅w⋅cj₁ - ck₂⋅w₁⎥
163 | # ⎢                               ⎥
164 | # ⎢               ___      ___⎥
165 | # ⎣cj₂⋅ck₂⋅ŵ + ck₂⋅w⋅cj₁ + w₁⋅ck₁⎦
166 | 
167 | 
168 | 
169 | # julia> u[1](what => solve(u[3], what)[1]) |> simplify
170 | #      2                       
171 | #   cj₁ ⋅w   cj₁⋅ck₁⋅w₁        
172 | # - ────── - ────────── - cj₂⋅w
173 | #    cj₂      cj₂⋅ck₂
174 | 
175 | ## or -(w + cj * ck/sk * w1) / sj
176 | 
177 | #
178 | # For r_{k-2,k} we need to reach back one more step
179 | # C^*_{k} * C^*{k-1} * C^*_{k-2} W = [r_{k-2,k} r_{k-1,k}, r_{k,k}, 0]
180 | #
181 | # julia> @vars ck1 ck2 cj1 cj2 ci1 ci2 what wm1 w w1
182 | # julia> u = rotm(ck1, ck2, 3, 4) * rotm(cj1, cj2, 2, 4) * rotm(ci1, ci2, 1, 4) * [what, wm1, w, w1]
183 | # julia> u[1](what => solve(u[4], what)[1]) |> simplify
184 | #      2                                        
185 | #   ci₁ ⋅wm₁   ci₁⋅cj₁⋅w    ci₁⋅ck₁⋅w₁          
186 | # - ──────── - ───────── - ─────────── - ci₂⋅wm₁
187 | #     ci₂       ci₂⋅cj₂    ci₂⋅cj₂⋅ck₂                  
188 | #
189 | # of -(wm1 + (ci*cj/sj)*w + (ci*ck) / (sj * sk) * w1) / si
190 | #
191 | # This will have problems if any of si, sj or sk are 0. This happens if the
192 | # Ct[k] become trivial. Theorem 4.1 ensures this can't happen mathematically
193 | # though numerically, this is a different matter. The bound involves 1/||p|| which can be smaller than machine precision for, say, Wilknson(20)
194 | #
195 | 
196 | 
197 | # D values are only for ComplexRealSingleShift
198 | getD(state::ComplexRealSingleShift, k::Int) = state.D[k]
199 | getD{T}(state::ShiftType{T}, k::Int) = one(T)
200 | 
201 | function diagonal_block{T}(state::ShiftType{T}, k)
202 |     k >= 2 && k <= state.N || error("$k not in [2,n]")
203 | 
204 |     A = state.A 
205 |     R = state.R # temp storage
206 | 
207 |     Q,Ct,B = state.Q, state.Ct, state.B
208 |     
209 |     if k == 2
210 |         Bj_c, Bj_s = vals(B[k-1]);  Bk_c, Bk_s = vals(B[k])
211 |         Cj_c, Cj_s = vals(Ct[k-1]); Ck_c, Ck_s = vals(Ct[k])
212 |         Qj_c, Qj_s = vals(Q[k-1]);  Qk_c, Qk_s = vals(Q[k])
213 | 
214 |         
215 |         # # here we only need [r11 r12; 0 r22]
216 |         # k=2 this is r_kk, r_k-1,k
217 | 
218 |         # for k
219 |         wl =  Bk_s
220 |         wk =  conj(Bj_c) * Bk_c
221 | 
222 |         # rkk = -w_{k+1} / ck_s
223 |         R[2,2] = - wl / Ck_s
224 |         
225 |         # r_{k-1,k} =  -(wk + cj_c * conj(ck_c) / ck_s *wl)/cj_s
226 |         R[1,2] = - (wk + Cj_c * conj(Ck_c) / Ck_s * wl) / Cj_s
227 | 
228 |         # for k - 1 we have (l=k)
229 |         wl = Bj_s
230 |         R[1,1] = - wl / Cj_s
231 |         R[2,1] = complex(zero(T))
232 | 
233 |         alpha, beta = getD(state, k-1), getD(state, k)
234 |         R[1,1] *= alpha; R[1,2] *= alpha
235 |         R[2,1] *= beta; R[2,2] *= beta
236 | 
237 | # 3×2 Array{SymPy.Sym,2}
238 | # ⎡                           ___⎤
239 | # ⎢R₁₁⋅qjc  R₁₂⋅qjc - R₂₂⋅qkc⋅qjs⎥
240 | # ⎢                              ⎥
241 | # ⎢                           ___⎥
242 | # ⎢R₁₁⋅qjs  R₁₂⋅qjs + R₂₂⋅qkc⋅qjc⎥
243 | # ⎢                              ⎥
244 | # ⎣   0            R₂₂⋅qks       ⎦
245 | 
246 |         A[1,1] = R[1,1] * Qj_c
247 |         A[2,1] = R[1,1] * Qj_s
248 |         A[1,2] = R[1,2] * Qj_c - R[2,2] * Qk_c * conj(Qj_s)
249 |         A[2,2] = R[1,2] * Qj_s + R[2,2] * Qk_c * conj(Qj_c)
250 | 
251 |     else
252 |         
253 |         Bi_c, Bi_s = vals(B[k-2]);  Bj_c, Bj_s = vals(B[k-1]);  Bk_c, Bk_s = vals(B[k])
254 |         Ci_c, Ci_s = vals(Ct[k-2]); Cj_c, Cj_s = vals(Ct[k-1]); Ck_c, Ck_s = vals(Ct[k])
255 |         Qi_c, Qi_s = vals(Q[k-2]);  Qj_c, Qj_s = vals(Q[k-1]);  Qk_c, Qk_s = vals(Q[k])
256 | 
257 |         
258 |         # for k
259 |         wl =   Bk_s
260 |         wk =  conj(Bj_c) * Bk_c
261 |         wj = - conj(Bi_c) * conj(Bj_s) * Bk_c
262 |         
263 |         R[3,2] = - wl / Ck_s
264 |         R[2,2] = - (wk + Cj_c * conj(Ck_c) / Ck_s * wl) / Cj_s
265 |         
266 |         # -(wj + ci_c * conj(cj_c) / cj_s * wk + ci_c * conj(ck_c) / (cj_s * ck_s) * wl)/ci_s
267 |         R[1,2] = -(wj + Ci_c * conj(Cj_c) / Cj_s * wk +
268 |                    Ci_c * conj(Ck_c) / (Cj_s * Ck_s) * wl) / Ci_s
269 | 
270 |         # downshift C indexes l->k; k->j; j->i; but keep w's (confusing)
271 |         wl =  Bj_s
272 |         wk =  conj(Bi_c) * Bj_c
273 |         R[2,1] = - wl / Cj_s
274 |         R[1,1] = - (wk + Ci_c * conj(Cj_c) / Cj_s * wl) / Ci_s
275 |         R[3,1] = zero(T)
276 | 
277 |         alpha, beta, gamma = getD(state, k-2), getD(state, k-1), getD(state, k)
278 |         R[1,1] *= alpha; R[1,2] *= alpha
279 |         R[2,1] *= beta;  R[2,2] *= beta
280 |         R[3,1] *= gamma; R[3,2] *= gamma
281 |         
282 | # make Qs from multiplying rotators
283 | # make Rs = [Sym("r$i$j") for i in 1:5, j in 1:5] |> triu
284 | # julia> (Qs * Rs)[2:4, 2:3] ## but indexing of r's is off! j-1 needed
285 | # 3×2 Array{SymPy.Sym,2}
286 | # ⎡                  ___                    ___               ___⎤
287 | # ⎢q1s⋅r₁₂ + q2c⋅r₂₂⋅q1c  q1s⋅r₁₃ + q2c⋅r₂₃⋅q1c - q2s⋅q3c⋅r₃₃⋅q1c⎥
288 | # ⎢                                                              ⎥
289 | # ⎢                                                  ___         ⎥
290 | # ⎢       q2s⋅r₂₂                  q2s⋅r₂₃ + q3c⋅r₃₃⋅q2c         ⎥
291 | # ⎢                                                              ⎥
292 | # ⎣          0                            q3s⋅r₃₃                ⎦
293 | # 2×2 Array{SymPy.Sym,2}
294 | # ⎡                  ___                    ___           ___ ___⎤
295 | # ⎢R₁₂⋅qis + R₂₂⋅qjc⋅qic  R₁₃⋅qis + R₂₃⋅qjc⋅qic - R₃₃⋅qkc⋅qic⋅qjs⎥
296 | # ⎢                                                              ⎥
297 | # ⎢                                                  ___         ⎥
298 | # ⎣       R₂₂⋅qjs                  R₂₃⋅qjs + R₃₃⋅qkc⋅qjc         ⎦
299 | 
300 | 
301 |         A[1,1] = R[1,1] * Qi_s + R[2,1] * conj(Qi_c) * Qj_c
302 |         A[2,1] = R[2,1] * Qj_s
303 |         A[1,2] = R[1,2] * Qi_s + R[2,2] * conj(Qi_c) * Qj_c - R[3,2] * conj(Qi_c) * conj(Qj_s) * Qk_c
304 |         A[2,2] = R[2,2] * Qj_s + R[3,2] * conj(Qj_c) * Qk_c
305 |         
306 |     end
307 | 
308 |     false 
309 | end
310 | 
311 | ##################################################
312 | 
313 | # [a11 - l a12; a21 a22] -> l^2 -2 * (tr(A)/2) l + det(A)
314 | # so we use b = tr(A)/2 for qdrtc routing
315 | function eigen_values{T}(state::RealDoubleShift{T})
316 | 
317 |     a11, a12 = state.A[1,1], state.A[1,2]
318 |     a21, a22 = state.A[2,1], state.A[2,2]
319 | 
320 |     b = (a11 + a22) * (0.5)  
321 |     c = a11 * a22 - a12 * a21
322 |     
323 |     state.e1[1], state.e1[2], state.e2[1], state.e2[2] = qdrtc(one(T), b, c)
324 |     complex(state.e1[1], state.e1[2]), complex(state.e2[1], state.e2[2])            
325 | end    
326 | 
327 | # from `modified_quadratic.f90`
328 | function eigen_values{T}(state::ShiftType{T})
329 | 
330 |     a11, a12 = state.A[1,1], state.A[1,2]
331 |     a21, a22 = state.A[2,1], state.A[2,2]
332 | 
333 |     tr = a11 + a22
334 |     detm = a11 * a22 - a21 * a12
335 |     disc = sqrt(tr * tr - 4.0 * detm)
336 | 
337 |     u = abs(tr + disc) > abs(tr - disc) ? tr + disc : tr - disc
338 |     if iszero(u)
339 |         state.e1[1], state.e1[2] = zero(T), zero(T)
340 |         state.e2[1], state.e2[2] = zero(T), zero(T)
341 |     else
342 |         e1 = u / 2.0
343 |         e2 = detm / e1
344 |         state.e1[1], state.e1[2] = real(e1), imag(e1)
345 |         state.e2[1], state.e2[2] = real(e2), imag(e2)
346 |     end
347 | 
348 |     complex(state.e1[1], state.e1[2]), complex(state.e2[1], state.e2[2])                
349 | end    
350 | 
351 | 
352 | ##################################################
353 | ## Deflation
354 | ## when a Q[k] matrix become a "D" matrix, we deflate. This is checked by the sine term being basically 0.
355 | function check_deflation{T}(state::ShiftType{T}, tol = eps(T))
356 |     for k in state.ctrs.stop_index:-1:state.ctrs.start_index
357 |         if abs(vals(state.Q[k])[2]) <= tol
358 |             deflate(state, k)
359 |             return
360 |         end
361 |     end
362 | end
363 | 
364 | # deflate a term
365 | # turn on `show_status` to view sequence
366 | function deflate{T}(state::ShiftType{T}, k)
367 | 
368 |     # make a D matrix
369 |     c,s = vals(state.Q[k])
370 |     vals!(state.Q[k], c, zero(T)) # zero out s, will renormalize c
371 | 
372 |     # shift zero counter
373 |     state.ctrs.zero_index = k      # points to a matrix Q[k] either RealRotator(-1, 0) or RealRotator(1, 0)
374 |     state.ctrs.start_index = k + 1
375 | 
376 |     # reset counter
377 |     state.ctrs.it_count = 1
378 | end
379 | 
380 | 
381 | # deflate a term
382 | # deflation for ComplexReal is different, as
383 | # we replace Qi with I and move diagonal part into D
384 | function deflate{T}(state::ComplexRealSingleShift{T}, k)
385 | 
386 |     # when we deflate here we want to leave Q[k] = I and
387 |     # move Dk matrix over to merge with D
388 |     # we do this by m
389 |     # Qi           Qi              Dk Qi
390 |     #   Qj     ->     Dk Qj    -->    Dk  Qj  and so on until we get to start_index
391 |     #     Dk Ik         Dk  Ik           Dk Ik
392 |     #
393 |     # then the Dk's are collected into [alpa 0; I; 0 conj(alpha)] (start,k+1)
394 | 
395 |     alpha, s = vals(state.Q[k])
396 |     vals!(state.Q[k], one(Complex{T}), zero(T)) # I
397 | 
398 |     cascade(state.Q, state.D, alpha, k, state.ctrs.stop_index) 
399 |     
400 |     # shift zero counter
401 |     state.ctrs.zero_index = k      # points to a matrix Q[k] either RealRotator(-1, 0) or RealRotator(1, 0)
402 |     state.ctrs.start_index = k + 1
403 | 
404 |     # reset counter
405 |     state.ctrs.it_count = 1
406 | end
407 | 
408 | 
409 | ##################################################
410 | 
411 | 
412 | 
413 | 


--------------------------------------------------------------------------------
/src/bulge.jl:
--------------------------------------------------------------------------------
  1 | ## Bulge chasing
  2 | 
  3 | ## chase bulge from top to bottom until final absorbtion 
  4 | function bulge_step{T}(state::ShiftType{T})
  5 |     create_bulge(state)
  6 |     prepare_bulge(state)
  7 |     chase_bulge(state)
  8 |     absorb_bulge(state)
  9 | end
 10 | 
 11 | 
 12 | 
 13 | ## RealDoubleShift
 14 | ##
 15 | ## There are two rotators, U, V, to chase through the matrix using the following operations
 16 | ##                                   [         [
 17 | ## a unitary transform: basically U    [   -->   [     U; as we just hit both sides by U' A U and U' U is I
 18 | ##                                       [          [
 19 | ##
 20 | ##
 21 | ## A fuse   [ [ -> [
 22 | ##
 23 | ## A turnover    [   M -->   [     where M moves through a descending or ascending structure
 24 | ##                 [       M   [
 25 | ##
 26 | ## a "D" flip:  D    --->     D
 27 | ##                [        [
 28 | ##
 29 | 
 30 | 
 31 | ## The bulge is created by  (A-rho1) * (A - rho2) * e_1 where rho1 and rho2 are eigenvalue or random
 32 | function create_bulge{T}(state::RealDoubleShift{T})
 33 | 
 34 |     if mod(state.ctrs.it_count, 15) == 0
 35 |         
 36 |         t = rand() * pi
 37 |         re1, ie1 = cos(t), sin(t)
 38 |         re2, ie2 = re1, -ie1
 39 |         
 40 |         vals!(state.U, re1, ie1); idx!(state.U, state.ctrs.start_index)
 41 |         vals!(state.Ut, re1, -ie1); idx!(state.Ut, state.ctrs.start_index)
 42 |         
 43 |         vals!(state.V, re2, ie2); idx!(state.V, state.ctrs.start_index + 1)
 44 |         vals!(state.Vt, re2, -ie2); idx!(state.Vt, state.ctrs.start_index + 1)        
 45 |         
 46 |     else
 47 | 
 48 |         # compute (A-rho1) * (A - rho2) * e_1
 49 |         # find e1, e2
 50 | 
 51 |         flag = diagonal_block(state, state.ctrs.stop_index+1)
 52 |         eigen_values(state)        
 53 |         l1r, l1i = state.e1
 54 |         l2r, l2i =  state.e2
 55 | 
 56 |         # find first part of A[1:3, 1:2]
 57 |         Bk = state.Bk 
 58 |         flag = flag | diagonal_block(state,  state.ctrs.start_index+1)
 59 | 
 60 |         bk11, bk12 = state.A[1,1], state.A[1,2]
 61 |         bk21, bk22 = state.A[2,1], state.A[2,2]
 62 | 
 63 |         # find last part
 64 |         flag = flag | diagonal_block(state, state.ctrs.start_index+2)
 65 |         #        Bk[3,2] = state.A[2, 1]
 66 |         bk32 = state.A[2,1]
 67 | 
 68 |         # an issue... restart
 69 |         # if isnan(l1r) || isnan(l1i) || isnan(l2r) || isnan(l2i)
 70 |         #      ## eigvals gone awry
 71 |         #      restart(state)
 72 |         #      return create_bulge(state)
 73 |         # end
 74 | 
 75 |         
 76 |         
 77 | #        if !flag  # flag is false if there is an issue
 78 | #            restart(state)
 79 | #            return create_bulge(state)
 80 | #        end
 81 |         
 82 |         # make first three elements of c1,c2,c3
 83 |         # c1 = real(-l1i⋅l2i + ⅈ⋅l1i⋅l2r - ⅈ⋅l1i⋅t₁₁ + ⅈ⋅l1r⋅l2i + l1r⋅l2r - l1r⋅t₁₁ - ⅈ⋅l2i⋅t₁₁ - l2r⋅t₁₁ + t₁₁^2  + t₁₂⋅t₂₁)
 84 |         # c2 = real(-ⅈ⋅l1i⋅t₂₁ - l1r⋅t₂₁ - ⅈ⋅l2i⋅t₂₁ - l2r⋅t₂₁ + t₁₁⋅t₂₁ + t₂₁⋅t₂₂)
 85 |         # c3 = real(t₂₁⋅t₃₂)
 86 |         
 87 |         c1 = -l1i * l2i + l1r*l2r -l1r*bk11 -l2r * bk11 + bk11^2 + bk12 * bk21
 88 |         c2 = -l1r * bk21 - l2r * bk21 + bk11* bk21 + bk21 * bk22
 89 |         c3 = bk21 * bk32
 90 | 
 91 |         
 92 |         c, s, nrm = givensrot(c2, c3)
 93 |         j = state.ctrs.start_index + 1
 94 | 
 95 |         vals!(state.V, c, -s)
 96 |         idx!(state.V, j)
 97 | 
 98 |         c, s, tmp = givensrot(c1, nrm)
 99 | 
100 |         vals!(state.U, c, -s)
101 |         idx!(state.U, j-1)
102 |     end
103 | 
104 | end
105 | 
106 | ## make W on left side
107 | #
108 | # initial            Q0
109 | # we do turnover U1'     Q1     -->    U1'        -->    U1'          -->    Q1
110 | #                    V1'    Q2      Q1     V1' Q2     Q1     (V1'Q2)       W1  Q2
111 | #
112 | # With this, W will be kept on the left side until the last step, U,V
113 | # move through left to right by one step, right to left by unitariness
114 | #
115 | #      Q0                       Q0                     Q0               Q0
116 | #  U1'     Q1        U1'    Q1*         ->         U1            -->       U1*
117 | #      V1'    Q3 ->     V1'         Q3      Q1**      V1'   Q3        W          (V1'Q3)
118 | #
119 | # Q0 is (p,0) rotator, p 1 or -1. We have
120 | #    Q0  --> Q0
121 | #  R             (r, pr2)
122 | function prepare_bulge{T}(state::RealDoubleShift{T})
123 |     
124 |     # N = state.N
125 |     # as_full(state.V', N+1)* as_full(state.U', N+1)* full(state) * as_full(state.U, N+1) * as_full(state.V, N+1) |> eigvals |> println
126 | 
127 | 
128 |     k = state.ctrs.start_index
129 | 
130 |     vals!(state.Ut, state.U.c, -state.U.s); idx!(state.Ut, idx(state.U))
131 |     vals!(state.Vt, state.V.c, -state.V.s); idx!(state.Vt, idx(state.V))
132 | 
133 |     copy!(state.W, state.Q[k])
134 |     p = k == 1 ? one(T) : state.Q[k-1].c  #  zero index implies Q0 = RR(1,0) or RR(-1,0)
135 |     dflip(state.W, p)
136 |     
137 |     turnover(state.Ut, state.Vt, state.W, Val{:right})
138 |     fuse(state.Vt, state.Q[k+1], Val{:right})  # V' Q3
139 |     dflip(state.Ut, p)
140 |     vals!(state.Q[k], state.Ut.c, state.Ut.s) 
141 |     
142 | end
143 | 
144 | ## Bulge chasing moves U, V fr from R to L through B then C then Q where an interaction with W allows
145 | ## a subsequent unitary operation to move U,V back to the right, one step down
146 | ## The case when Ct[i] and B[i] are identical allow a speed up.
147 | 
148 | function one_bulge_chase_shortcut{T}(state::RealDoubleShift{T})
149 |     i = idx(state.V)
150 |     # borrow Vt, Ut here to store a copy
151 |     copy!(state.Vt, state.V)
152 |     copy!(state.Ut, state.U)    
153 |     
154 |     turnover(state.B[i],    state.B[i+1], state.Vt, Val{:right})
155 |     turnover(state.B[i-1],  state.B[i],   state.Ut, Val{:right})
156 |     for k in -1:1
157 |         a,b = vals(state.B[i+k])
158 |         vals!(state.Ct[i+k], a, -b) # using copy!(Ct, B') is slower
159 |     end
160 | 
161 |     turnover(state.Q[i],    state.Q[i+1], state.V, Val{:right})
162 |     turnover(state.Q[i-1],  state.Q[i],   state.U, Val{:right})
163 |     turnover(state.W,       state.V,      state.U, Val{:left})
164 |     
165 | end
166 | 
167 | function one_bulge_chase{T}(state::RealDoubleShift{T})
168 |     i = idx(state.V)
169 |     turnover(state.B[i],    state.B[i+1], state.V, Val{:right})
170 |     turnover(state.Ct[i+1], state.Ct[i],  state.V, Val{:right})
171 | 
172 |     
173 |     j = i - 1
174 |     turnover(state.B[j],    state.B[j+1], state.U, Val{:right})
175 |     turnover(state.Ct[j+1], state.Ct[j],  state.U, Val{:right})
176 | 
177 |     turnover(state.Q[i],    state.Q[i+1], state.V, Val{:right})
178 |     turnover(state.Q[j],    state.Q[j+1], state.U, Val{:right})
179 |     turnover(state.W,       state.V,      state.U, Val{:left})
180 | 
181 | end
182 | 
183 | 
184 | function chase_bulge{T}(state::RealDoubleShift{T})
185 | 
186 |     # println("  begin chase at level $(state.V.i)")
187 |     # as_full(state.W, state.N+1)* full(state) * as_full(state.V, state.N+1) * as_full(state.U, state.N+1) |> eigvals |> println
188 |     
189 |     # one step
190 |     i = idx(state.V)
191 | 
192 |     ## When  i < tr  C_i = B_i. This happens in the early steps
193 |     ## this means fewer turnovers, but at a price of more allocations
194 |     while i < state.ctrs.stop_index # loops from start_index to stop_index - 1
195 |         if i <= state.ctrs.tr
196 |             one_bulge_chase_shortcut(state) 
197 |         else
198 |             one_bulge_chase(state)
199 |         end
200 | 
201 |         i += 1
202 | 
203 |     end
204 | 
205 |     # println("end chase")
206 |     # as_full(state.W, state.N+1)* full(state) * as_full(state.V, state.N+1) * as_full(state.U, state.N+1) |> eigvals |> println    
207 | end
208 | 
209 | 
210 | ## Bulge is absorbed by moving V through, then U going throug two trips.
211 | function absorb_bulge{T}(state::RealDoubleShift{T})
212 | 
213 |     # println("absorb 0")
214 |     # as_full(state.W, state.N+1) * full(state) * as_full(state.V, state.N+1) * as_full(state.U, state.N+1) |> eigvals |> println    
215 | 
216 |     
217 |     # first V goes through B, C then fuses with Q
218 |     i = idx(state.V)
219 | 
220 |     turnover(state.B[i],     state.B[i+1], state.V, Val{:right})
221 |     turnover(state.Ct[i+1],  state.Ct[i],  state.V, Val{:right})
222 | 
223 |     ## We may be fusing Q            P  --> (Q') 
224 |     #                      RR(-1,0)               RR(-1,0)
225 |     #
226 |     
227 |     p = getd(state.Q[i+1])
228 |     dflip(state.V, p)
229 |     fuse(state.Q[i], state.V, Val{:left}) # fuse Q*V -> Q
230 | 
231 | 
232 |     # println("absorb 1")
233 |     # as_full(state.W, state.N+1) * full(state) *  as_full(state.U, state.N+1) |> eigvals |> println        
234 | 
235 |     
236 |     # Then bring U through B, C, and Q to fuse with W
237 |     j = idx(state.U)
238 |     turnover(state.B[j],     state.B[j+1], state.U)
239 |     turnover(state.Ct[j+1],  state.Ct[j],  state.U)
240 |     turnover(state.Q[j],     state.Q[j+1], state.U)
241 |     fuse(state.W, state.U, Val{:right})
242 | 
243 |     # println("absorb 2")
244 |     # as_full(state.U, state.N+1) * full(state) |> eigvals |> println    
245 |     
246 |     # similarity transformation, bring through then fuse with Q
247 |     j = idx(state.U)
248 |     turnover(state.B[j], state.B[j+1], state.U, Val{:right})
249 |     turnover(state.Ct[j+1],  state.Ct[j], state.U)
250 |     p = getd(state.Q[j+1])
251 |     dflip(state.U, p)
252 |     fuse(state.Q[j], state.U, Val{:left})
253 | 
254 |     # println("absorb final")
255 |     # full(state) |> eigvals |> println    
256 | end
257 | 
258 | 
259 | 
260 | ##################################################
261 | ## ComplexRealSingleShift
262 | 
263 | function create_bulge{T}(state::ComplexRealSingleShift{T})
264 | 
265 |     if mod(state.ctrs.it_count, 15) == 0
266 |         
267 |         t = rand() * pi
268 |         if state.ray
269 |             shift = complex(cos(t), sin(t))
270 |         else
271 |             shift = complex(cos(t), zero(T))
272 |         end
273 |         
274 |     else
275 |         
276 |         flag = diagonal_block(state, state.ctrs.stop_index+1)
277 |         if state.ray
278 |             e1, e2 = eigen_values(state)            
279 |             shift = norm(state.A[2,2] - e1) < norm(state.A[2,2] - e2) ? e1 : e2
280 |         else
281 |             shift = state.A[2,2]
282 |         end
283 |         
284 |     end
285 | 
286 |     flag = diagonal_block(state, state.ctrs.start_index+1)
287 |     c,s,nrm = givensrot(state.A[1,1] - shift, state.A[2,1])
288 |     
289 |     vals!(state.U, conj(c), -s) # U is the inverse of what we just found,
290 |         idx!(state.U, state.ctrs.start_index)
291 | 
292 |     vals!(state.Ut, c, s)
293 |     idx!(state.Ut, idx(state.U))
294 |     nothing
295 | end
296 |         
297 |   
298 | ##
299 | ## U Qi            (fUQi) Di          (fUQi)   Di           (fUQi)    
300 | ##      Qj    -->          Qj   -->         Qj   Dj    -->        Qj     D  
301 | ##         Qk                Qk                Qk  Dk                Qk 
302 | function prepare_bulge{T}(state::ComplexRealSingleShift{T})
303 |     i = idx(state.Ut)
304 | 
305 |     # when deflating here we ensure Q[i-1] is an identity matrix
306 |     # so no dflip
307 |     
308 |     alpha = fuse(state.Ut, state.Q[i], Val{:right})
309 |     cascade(state.Q, state.D, alpha, i, state.ctrs.stop_index)
310 |     
311 | end       
312 | 
313 | ##
314 | function one_bulge_chase_shortcut{T}(state::ComplexRealSingleShift{T})
315 |     ## savings are one fewer turnover, a few copies
316 |     i = idx(state.U)
317 | 
318 | 
319 |     copy!(state.Ut, state.U)
320 |     turnover(state.B[i],    state.B[i+1], state.Ut, Val{:right})
321 |     for k in 0:1
322 |         a,b = vals(state.B[i+k])
323 |         vals!(state.Ct[i+k], conj(a), -b) # using copy!(Ct, B') is slower
324 |     end
325 | 
326 |     i = idx(state.U)
327 |     # passthrough(view(state.D,i:(i+1)), state.U)
328 |     state.Dp[1], state.Dp[2] = state.D[i], state.D[i+1]
329 |     passthrough(state.Dp, state.U)
330 |     state.D[i],state.D[i+1] = state.Dp[1], state.Dp[2]
331 |     
332 |     turnover(state.Q[i],    state.Q[i+1], state.U, Val{:right}) 
333 |     
334 | end
335 | 
336 | # Moving QCBU_i -> QC(BUi) -> QCUi1B -> Q(CUi1)B -> QUiCB -> Ui1 Q C B
337 | function one_bulge_chase{T}(state::ComplexRealSingleShift{T})
338 |     # can consolidate first turnovers
339 |     i = idx(state.U)
340 |     turnover(state.B[i],    state.B[i+1], state.U, Val{:right})
341 |     turnover(state.Ct[i+1], state.Ct[i],  state.U, Val{:right})
342 | #    passthrough(view(state.D,i:(i+1)), state.U) #allocates
343 |     state.Dp[1], state.Dp[2] = state.D[i], state.D[i+1]
344 |     passthrough(state.Dp, state.U)
345 |     state.D[i],state.D[i+1] = state.Dp[1], state.Dp[2]
346 |     
347 |     turnover(state.Q[i],    state.Q[i+1], state.U, Val{:right})    
348 | end
349 | 
350 | # Q Ct B D U -> Q Ct B (D U) -> Q Ct (B U) D -> Q (Ct U) B D ->
351 | # (Q U) Ct B D -> U Q Ct B D  then wrap via unitary operation
352 | function chase_bulge{T}(state::ComplexRealSingleShift{T})
353 | 
354 |     # one step
355 |     i = idx(state.U)
356 | 
357 |     ## When  i < tr  C_i = B_i. This happens in the early steps
358 |     ## this means fewer turnovers, but at a price of more allocations
359 |     while i < state.ctrs.stop_index # loops from start_index to stop_index - 1
360 |         if i <= state.ctrs.tr
361 |             one_bulge_chase_shortcut(state) 
362 |         else
363 |             one_bulge_chase(state)
364 |         end
365 |         i += 1
366 | 
367 |     end
368 | 
369 | end
370 | 
371 | ##
372 | ## pass through Ct*B with two turnovers
373 | ## pass throudh D
374 | ## fuse Q & U
375 | ## absorb phase into D
376 | function absorb_bulge{T}(state::ComplexRealSingleShift{T})
377 |     i = idx(state.U)
378 |     
379 |     turnover(state.B[i],    state.B[i+1], state.U, Val{:right})
380 |     turnover(state.Ct[i+1], state.Ct[i],  state.U, Val{:right})
381 | 
382 | 
383 | #    passthrough(view(state.D, i:(i+1)), state.U)
384 |     state.Dp[1], state.Dp[2] = state.D[i], state.D[i+1]
385 |     passthrough(state.Dp, state.U)
386 |     state.D[i],state.D[i+1] = state.Dp[1], state.Dp[2]
387 |     
388 | 
389 |     # fuse and then take care of new alpha by moving into state.D
390 |     alpha = fuse(state.Q[i], state.U, Val{:left})
391 |     
392 |     state.D[i] *= alpha
393 |     state.D[i+1] *= conj(alpha)
394 |     
395 | end
396 | 
397 | ##################################################
398 | 
399 | 
400 | ##################################################
401 | 
402 | ## ComplexComplexSingleShift 
403 | function create_bulge{T}(state::ComplexComplexSingleShift{T})
404 | 
405 |     if mod(state.ctrs.it_count, 15) == 0
406 |         
407 |         t = rand() * pi
408 |         if state.ray
409 |             shift = complex(cos(t), sin(t))
410 |         else
411 |             shift = complex(cos(t), zero(T))
412 |         end
413 |         
414 |     else
415 |         
416 |         flag = diagonal_block(state, state.ctrs.stop_index+1)
417 |         if state.ray
418 |             e1, e2 = eigen_values(state)
419 |             shift = norm(state.A[2,2] - e1) < norm(state.A[2,2] - e2) ? e1 : e2
420 |         else
421 |             shift = state.A[2,2]
422 |         end
423 |         
424 |     end
425 | 
426 |     flag = diagonal_block(state, state.ctrs.start_index+1)
427 |     c,s,nrm = givensrot(state.A[1,1] - shift, state.A[2,1])
428 | 
429 |     vals!(state.U, conj(c), -s) # U is the inverse of what we just found, 
430 |     idx!(state.U, state.ctrs.start_index)
431 |     vals!(state.Ut, c, s)
432 |     idx!(state.Ut, idx(state.U))
433 |     nothing
434 | end
435 |         
436 |   
437 | 
438 | ##
439 | ##    D        D           D
440 | ## U'    Q -->   V Q -->     (VQ) 
441 | ##
442 | ## with D = D(alpha); U' = (u1, v1); V = (u1 conj(alpha), v1)
443 | function prepare_bulge{T}(state::ComplexComplexSingleShift{T})
444 |     i = idx(state.Ut)
445 |     if i > 1
446 |         # if previously deflated, the prior is only diagonal
447 |         # so may have trouble passing Ut to Q[i]
448 |         Dflip(state.Ut, state.Q[i-1])
449 |     end
450 |     fuse(state.Ut, state.Q[i], Val{:right})
451 | end       
452 | 
453 | ##
454 | function one_bulge_chase_shortcut{T}(state::ComplexComplexSingleShift{T})
455 |     ## XXX speed up goes here, as we only need turnover through B, not C
456 |     ## savings are one fewer turnover, a few copies
457 |     one_bulge_chase(state)
458 | end
459 | # Moving QCBU_i -> QC(BUi) -> QCUi1B -> Q(CUi1)B -> QUiCB -> Ui1 Q C B
460 | function one_bulge_chase{T}(state::ComplexComplexSingleShift{T})
461 |     i = idx(state.U)
462 |     turnover(state.B[i],    state.B[i+1], state.U, Val{:right})
463 |     turnover(state.Ct[i+1], state.Ct[i],  state.U, Val{:right})
464 |     turnover(state.Q[i],    state.Q[i+1], state.U, Val{:right})    
465 | end
466 | 
467 | # Q Ct B D U -> Q Ct B (D U) -> Q Ct (B U) D -> Q (Ct U) B D ->
468 | # (Q U) Ct B D -> U Q Ct B D  then wrap via unitary operation
469 | function chase_bulge{T}(state::ComplexComplexSingleShift{T})
470 | 
471 |     # one step
472 |     i = idx(state.U)
473 | 
474 |     ## When  i < tr  C_i = B_i. This happens in the early steps
475 |     ## this means fewer turnovers, but at a price of more allocations
476 |     while i < state.ctrs.stop_index # loops from start_index to stop_index - 1
477 |         if i <= state.ctrs.tr
478 |             one_bulge_chase_shortcut(state) 
479 |         else
480 |             one_bulge_chase(state)
481 |         end
482 | 
483 |         i += 1
484 | 
485 |     end
486 | 
487 | end
488 | 
489 | # We have Q Ct B D U -> Q Ct B (U D) -> Q Ct (B U) D -> Q (Ct U) B D ->
490 | # (Q U) Ct B D -> Q Di Ct B D -> Q (Di Ct) B D -> Q Ct (Di B) D -> Q Ct B (Di D)
491 | # Q CB(Ui) -> QUi C B -> (QUi
492 | #  I
493 | function absorb_bulge{T}(state::ComplexComplexSingleShift{T})
494 |     i = idx(state.U)
495 | 
496 |     turnover(state.B[i],    state.B[i+1], state.U, Val{:right})
497 |     turnover(state.Ct[i+1], state.Ct[i],  state.U, Val{:right})
498 |     i < state.N && Dflip(state.U, state.Q[i+1])
499 |     fuse(state.Q[i], state.U, Val{:left})
500 | end
501 | 
502 | ##################################################
503 | 
504 | 


--------------------------------------------------------------------------------