├── REQUIRE ├── .codecov.yml ├── .gitignore ├── src ├── AMVW.jl ├── utils.jl ├── diagnostics.jl ├── AMVW_algorithm.jl ├── transformations.jl ├── types.jl ├── factorization.jl └── bulge.jl ├── README.md ├── .travis.yml ├── test.jl ├── appveyor.yml ├── LICENSE.md └── test ├── runtests.jl └── benchmark.jl /REQUIRE: -------------------------------------------------------------------------------- 1 | julia 0.5 2 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.jl.cov 2 | *.jl.*.cov 3 | *.jl.mem 4 | -------------------------------------------------------------------------------- /src/AMVW.jl: -------------------------------------------------------------------------------- 1 | #__precompile__(true) 2 | module AMVW 3 | 4 | using Compat 5 | # package code goes here 6 | 7 | 8 | include("types.jl") 9 | include("utils.jl") 10 | include("transformations.jl") 11 | include("bulge.jl") 12 | include("factorization.jl") 13 | include("diagnostics.jl") 14 | include("AMVW_algorithm.jl") 15 | 16 | 17 | end # module 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AMVW 2 | 3 | [![Build Status](https://travis-ci.org/jverzani/AMVW.jl.svg?branch=master)](https://travis-ci.org/jverzani/AMVW.jl) 4 | 5 | [![Coverage Status](https://coveralls.io/repos/jverzani/AMVW.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/jverzani/AMVW.jl?branch=master) 6 | 7 | [![codecov.io](http://codecov.io/github/jverzani/AMVW.jl/coverage.svg?branch=master)](http://codecov.io/github/jverzani/AMVW.jl?branch=master) 8 | 9 | 10 | 11 | An implementation of the main algorithm in "Fast and backward stable computation of roots of polynomials" by Aurentz, Mach, Vandrebil, and Watkins. 12 | 13 | Follows accompanying code which has unclear license. 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Documentation: http://docs.travis-ci.com/user/languages/julia/ 2 | language: julia 3 | os: 4 | - linux 5 | - osx 6 | julia: 7 | - release 8 | - nightly 9 | notifications: 10 | email: false 11 | # uncomment the following lines to override the default test script 12 | #script: 13 | # - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi 14 | # - julia -e 'Pkg.clone(pwd()); Pkg.build("AMVW"); Pkg.test("AMVW"; coverage=true)' 15 | after_success: 16 | # push coverage results to Coveralls 17 | - julia -e 'cd(Pkg.dir("AMVW")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())' 18 | # push coverage results to Codecov 19 | - julia -e 'cd(Pkg.dir("AMVW")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())' 20 | -------------------------------------------------------------------------------- /test.jl: -------------------------------------------------------------------------------- 1 | 2 | ## Drivers... 3 | 4 | using Polynomials 5 | ## ps a poly with coefficients p0 + p1x +p2x^2 + p3x^3 + p4x^4 [p0, p1, ..., p4] 6 | damvw(p::Poly) = damvw(p.a) 7 | 8 | function poly_roots(p::Poly) 9 | state = damvw(p.a) 10 | AMVW(state) 11 | poly_roots(state) 12 | end 13 | 14 | 15 | using Base.Profile 16 | using Polynomials 17 | T = Float64 18 | #T = BigFloat 19 | x = variable(T) 20 | p = prod(x - i/10 for i in 1:20) 21 | #p = prod(x^2 + i for i in 1:5) 22 | #p = poly(linspace(.1,1,20)) 23 | state = damvw(p) 24 | AMVW.AMVW(state) 25 | 26 | 27 | 28 | # ## warmed up 29 | 30 | n = 5 31 | p = poly(linspace(.5,1,n)) 32 | println(n) 33 | state = damvw(p) 34 | @time DAMVW.AMVW(state) 35 | 36 | Profile.clear() 37 | p = poly(linspace(.5,1,n)) 38 | state = damvw(p) 39 | @profile DAMVW.AMVW(state) 40 | Profile.print(format=:flat, sortedby=:count) 41 | 42 | # n = 10 43 | # as = zeros(5n) 44 | # for i in 1:5n 45 | # println("doing $i") 46 | # p = poly(linspace(.2, 1.0, i+2)) 47 | # state = damvw(p) 48 | # a = time(); DAMVW.AMVW(state); b = time() - a 49 | # as[i] = b 50 | # end 51 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | matrix: 3 | - JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe" 4 | - JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe" 5 | - JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe" 6 | - JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe" 7 | 8 | branches: 9 | only: 10 | - master 11 | - /release-.*/ 12 | 13 | notifications: 14 | - provider: Email 15 | on_build_success: false 16 | on_build_failure: false 17 | on_build_status_changed: false 18 | 19 | install: 20 | # Download most recent Julia Windows binary 21 | - ps: (new-object net.webclient).DownloadFile( 22 | $("http://s3.amazonaws.com/"+$env:JULIAVERSION), 23 | "C:\projects\julia-binary.exe") 24 | # Run installer silently, output to C:\projects\julia 25 | - C:\projects\julia-binary.exe /S /D=C:\projects\julia 26 | 27 | build_script: 28 | # Need to convert from shallow to complete for Pkg.clone to work 29 | - IF EXIST .git\shallow (git fetch --unshallow) 30 | - C:\projects\julia\bin\julia -e "versioninfo(); 31 | Pkg.clone(pwd(), \"AMVW\"); Pkg.build(\"AMVW\")" 32 | 33 | test_script: 34 | - C:\projects\julia\bin\julia -e "Pkg.test(\"AMVW\")" 35 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | This is not the proper license, though it would be nice 3 | 4 | ## The AMVW.jl package is licensed under the MIT "Expat" License: 5 | 6 | > Copyright (c) 2017: XXX 7 | > 8 | > Permission is hereby granted, free of charge, to any person obtaining a copy 9 | > of this software and associated documentation files (the "Software"), to deal 10 | > in the Software without restriction, including without limitation the rights 11 | > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | > copies of the Software, and to permit persons to whom the Software is 13 | > furnished to do so, subject to the following conditions: 14 | > 15 | > The above copyright notice and this permission notice shall be included in all 16 | > copies or substantial portions of the Software. 17 | > 18 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | > SOFTWARE. 25 | > 26 | -------------------------------------------------------------------------------- /src/utils.jl: -------------------------------------------------------------------------------- 1 | 2 | ## take poly [p0, p1, ..., pn] and return 3 | ## [q_m-1, q_m-2, ..., q0], k 4 | ## where we trim of k roots of 0, and then make p monic, then reverese 5 | ## monomial x^5 6 | function reverse_poly{T}(ps::Vector{T}) 7 | ## trim any 0s from the end of ps 8 | N = findlast(!iszero, ps) 9 | K = findfirst(!iszero, ps) 10 | 11 | N == 0 && return(zeros(T,0), length(ps)) 12 | ps = ps[K:N] 13 | 14 | 15 | qs = reverse(ps./ps[end])[2:end] 16 | qs, K-1 17 | end 18 | 19 | # 20 | function quadratic_equation{T <: Real}(a::T, b::T, c::T) 21 | qdrtc(a, -(0.5)*b, c) 22 | end 23 | 24 | ## make more robust 25 | function quadratic_equation{T}(a::Complex{T}, b::Complex{T}, c::Complex{T}) 26 | d = sqrt(b^2 - 4*a*c) 27 | e1 = (-b + d)/(2a); e2 = (-b-d)/(2a) 28 | return (real(e1), imag(e1), real(e2), imag(e2)) 29 | 30 | end 31 | 32 | ## Kahan quadratic equation with fma 33 | ## https://people.eecs.berkeley.edu/~wkahan/Qdrtcs.pdf 34 | 35 | ## solve ax^2 - 2bx + c 36 | function qdrtc{T <: Real}(a::T, b::T, c::T) 37 | # z1, z2 roots of ax^2 - 2bx + c 38 | d = discr(a,b,c) # (b^2 - a*c), as 2 removes 4 39 | 40 | if d <= 0 41 | r = b/a # real 42 | s = sqrt(-d)/a #imag 43 | return (r,s,r,-s) 44 | else 45 | r = sqrt(d) * (sign(b) + iszero(b)) + b 46 | return (r/a, zero(T), c/r, zero(T)) 47 | end 48 | end 49 | 50 | ## more work could be done here. 51 | function discr{T}(a::T,b::T,c::T) 52 | pie = 3.0 # depends on 53 or 64 bit... 53 | d = b*b - a*c 54 | e = b*b + a*c 55 | 56 | pie*abs(d) > e && return d 57 | 58 | p = b*b 59 | dp = muladd(b,b,-p) 60 | q = a*c 61 | dq = muladd(a,c,-q) 62 | 63 | (p-q) + (dp - dq) 64 | end 65 | 66 | ## 67 | # solve degree 2 or less case 68 | ## COMPLEX VALUSE XXX 69 | function solve_simple_cases(state) 70 | # println("Simple case setting eigen value") 71 | if N == 0 72 | state.FLAG = -1 73 | return 74 | elseif N == 1 75 | state.FLAG = 0 76 | N == 1 && (state.REIGS[1] = -state.POLY[1]) 77 | return 78 | elseif N == 2 79 | # quadratic formula 80 | c,b,a = state.POLY[1], state.POLY[2], 1.0 81 | 82 | tr = -b 83 | disc = b^2 - 4.0*c 84 | 85 | if disc < 0 86 | state.REIGS[1] = -b/2.0 87 | state.IEIGS[1] = sqrt(-disc)/2.0 88 | state.REIGS[2] = state.REIGS[1] 89 | state.IEIGS[2] = -state.IEIGS[1] 90 | else 91 | u,v = tr + sqrt(disc), tr - sqrt(disc) 92 | if abs(u) < abs(v) 93 | u,v = v, u 94 | end 95 | if u == 0 96 | ## nothing to do 97 | else 98 | state.REIGS[1] = u/2.0 99 | state.REIGS[2] = c/state.REIGS[1] 100 | end 101 | end 102 | end 103 | end 104 | 105 | -------------------------------------------------------------------------------- /src/diagnostics.jl: -------------------------------------------------------------------------------- 1 | ## Diagonostic code 2 | ## 3 | 4 | ## make a rotator into a full matrix 5 | 6 | function as_full{T}(a::Rotator{T}, N::Int) 7 | c,s = vals(a) 8 | i = idx(a) 9 | i < N || error("too big") 10 | A = eye(Complex{T}, N) 11 | A[i:i+1, i:i+1] = [c -conj(s); s conj(c)] 12 | A 13 | end 14 | 15 | 16 | 17 | function zero_out!{T}(A::Array{T}, tol=1e-12) 18 | A[norm.(A) .<= tol] = zero(T) 19 | end 20 | function zero_out!{T}(A::Array{Complex{T}}, tol=1e-12) 21 | for i in eachindex(A) 22 | c = A[i] 23 | cr, ci = real(c), imag(c) 24 | if abs(cr) < tol 25 | cr = zero(T) 26 | end 27 | if abs(ci) < tol 28 | ci = zero(T) 29 | end 30 | A[i] = complex(cr, ci) 31 | end 32 | end 33 | 34 | ## diagnostic 35 | 36 | ## create Full matrix from state object. For diagnostic purposes. 37 | # we may or may not have a diagonal matrix to keep track or 38 | D_matrix{T}(state::ComplexRealSingleShift{T}) = diagm(state.D) 39 | D_matrix(state::ShiftType) = I 40 | 41 | #function Base.full{T}(state::ComplexRealSingleShift{T}, what=:A) 42 | function Base.full{T}(state::ShiftType{T}, what=:A) 43 | N = state.N 44 | Q = as_full(state.Q[1],N+1); for i in 2:N Q = Q * as_full(state.Q[i],N+1) end 45 | Ct = as_full(state.Ct[1], N+1); for i in 2:N Ct = as_full(state.Ct[i],N+1)*Ct end 46 | B = as_full(state.B[1],N+1); for i in 2:N B = B * as_full(state.B[i],N+1) end 47 | D = D_matrix(state) 48 | 49 | 50 | # x = -vcat(state.POLY[2:state.N], state.POLY[1], 1) 51 | par = iseven(state.N) ? one(T) : -one(T) 52 | x = -vcat(state.POLY[state.N-1:-1:1], -par * state.POLY[state.N], par * 1) 53 | alpha = norm(x) 54 | e1 = zeros(T, state.N+1); e1[1]=one(T) 55 | en = zeros(T, state.N+1); en[N] = one(T) 56 | en1 = zeros(T, state.N+1); en1[N+1] = one(T) 57 | 58 | rho = transpose(en1) * Ct * e1 # scalar 59 | yt = -1/rho * transpose(en1) * Ct * B 60 | # clean 61 | for i in eachindex(yt) 62 | if norm(yt[i]) < 1e-12 63 | yt[i] = 0 64 | end 65 | end 66 | 67 | ## we have R = Z + x = Ct * (B * D + e1 * yt) 68 | Z = Ct * B 69 | zero_out!(Z) 70 | 71 | x = Ct * e1 * yt 72 | 73 | R = D*(Z + x) 74 | zero_out!(R) 75 | what == :R && return R 76 | 77 | A = Q * R 78 | zero_out!(A) 79 | A 80 | end 81 | 82 | 83 | # simple graphic to show march of algorithm 84 | function show_status{T}(state::ShiftType{T}) 85 | qs = [norm(u.s) for u in state.Q[state.ctrs.start_index:state.ctrs.stop_index]] 86 | minq = length(qs) > 0 ? minimum(qs) : 0.0 87 | 88 | 89 | x = fill(".", state.N+2) 90 | x[state.ctrs.zero_index+1] = "α" 91 | x[state.ctrs.start_index+1] = "x" 92 | x[state.ctrs.stop_index+2] = "Δ" 93 | println(join(x, ""), " ($minq)") 94 | end 95 | 96 | ## create a rotation matrix 97 | function rotm{T}(a::T,b, i, N) 98 | r = eye(T, N) 99 | r[i:i+1, i:i+1] = [a -conj(b); b conj(a)] 100 | r 101 | end 102 | 103 | -------------------------------------------------------------------------------- /src/AMVW_algorithm.jl: -------------------------------------------------------------------------------- 1 | 2 | ## Main algorithm of AMV&W 3 | ## This follows that given in the paper very closely 4 | function AMVW_algorithm{T}(state::ShiftType{T}) 5 | 6 | 7 | it_max = 60 * state.N 8 | kk = 0 9 | 10 | while kk <= it_max 11 | 12 | ## finished up! 13 | state.ctrs.stop_index <= 0 && return 14 | 15 | check_deflation(state) 16 | kk += 1 17 | 18 | ## show_status(state) 19 | 20 | k = state.ctrs.stop_index 21 | 22 | if state.ctrs.stop_index - state.ctrs.zero_index >= 2 23 | 24 | bulge_step(state) 25 | state.ctrs.it_count += 1 26 | state.ctrs.tr -= 2 27 | 28 | elseif state.ctrs.stop_index - state.ctrs.zero_index == 1 29 | 30 | diagonal_block(state, k + 1) 31 | eigen_values(state) 32 | 33 | 34 | state.REIGS[k], state.IEIGS[k] = state.e2 35 | state.REIGS[k+1], state.IEIGS[k+1] = state.e1 36 | 37 | if k > 1 38 | diagonal_block(state, k) 39 | eigen_values(state) 40 | end 41 | 42 | diagonal_block(state, 2) 43 | 44 | if state.ctrs.stop_index == 2 45 | diagonal_block(state, 2) 46 | e1 = state.A[1,1] 47 | state.REIGS[1] = real(e1) 48 | state.IEIGS[1] = imag(e1) 49 | end 50 | state.ctrs.zero_index = 0 51 | state.ctrs.start_index = 1 52 | state.ctrs.stop_index = state.ctrs.stop_index - 2 53 | 54 | elseif state.ctrs.stop_index - state.ctrs.zero_index == 0 55 | 56 | diagonal_block(state, state.ctrs.stop_index + 1) 57 | e1, e2 = state.A[1,1], state.A[2,2] 58 | 59 | if state.ctrs.stop_index == 1 60 | state.REIGS[1], state.IEIGS[1] = real(e1), imag(e1) 61 | state.REIGS[2], state.IEIGS[2] = real(e2), imag(e2) 62 | state.ctrs.stop_index = 0 63 | else 64 | state.REIGS[k+1], state.IEIGS[k+1] = real(e2), imag(e2) 65 | state.ctrs.zero_index = 0 66 | state.ctrs.start_index = 1 67 | state.ctrs.stop_index = k - 1 68 | end 69 | end 70 | end 71 | 72 | warn("Not all roots were found. The first $(state.ctrs.stop_index-1) are missing.") 73 | end 74 | 75 | ## qs is [p_{n-1}, p_{n-2}, ..., p_1, p_0] for 76 | ## monic poly x^n + p_{n-1}x^{n-1} + ... + p_1 x + p_0 77 | ## returns RealDoubleShift object 78 | function amvw{T <: Real}(qs::Vector{T}) 79 | state = RealDoubleShift(qs) 80 | init_state(state) 81 | AMVW_algorithm(state) 82 | state 83 | end 84 | 85 | function amvw{T <: Real}(qs::Vector{Complex{T}}) 86 | # state = ComplexSingleShift(qs) 87 | state = ComplexRealSingleShift(qs) 88 | init_state(state) 89 | AMVW_algorithm(state) 90 | state 91 | end 92 | 93 | """ 94 | Use AMVW algorithm doubleshift alorithm to find roots 95 | of the polynomial p_0 + p_1 x + p_2 x^2 + ... + p_n x^n encoded as 96 | `[p_0, p_1, ..., p_n]` (the same ordering used by `Polynomials`). 97 | 98 | Returns an object of type `RealDoubleShift`. 99 | 100 | Example: API needs work! 101 | ``` 102 | using Polynomials 103 | x = variable() 104 | p = poly(x - i/10 for i in 5:10) 105 | state = amvw(p.a) 106 | complex.(state.REIGS, state.IEIGS) 107 | ``` 108 | """ 109 | function poly_roots{T}(ps::AbstractVector{T}) 110 | ## roots of poly [p0, p1, ..., pn] 111 | qs, k = reverse_poly(ps) 112 | 113 | # k is number of 0 factors 114 | ## simple cases 115 | n = length(qs) 116 | if n == 0 117 | rts = complex.(zeros(k), zeros(k)) 118 | elseif n == 1 119 | as = vcat([-real(qs[1])], zeros(k)) 120 | bs = vcat([-imag(qs[1])], zeros(k)) 121 | rts = complex.(as, bs) 122 | elseif n == 2 123 | if T <: Real 124 | b,c = -(0.5)*qs[1], qs[2] 125 | e1r, e1i, e2r, e2i = qdrtc(one(T), b, c) 126 | else 127 | e1r, e1i, e2r, e2i = quadratic_equation(one(T), qs[1], qs[2]) 128 | end 129 | as = vcat([e1r, e2r], zeros(k)) 130 | bs = vcat([e1i, e2i], zeros(k)) 131 | rts = complex.(as, bs) 132 | else 133 | state = amvw(qs) 134 | as = vcat(state.REIGS, zeros(k)) 135 | bs = vcat(state.IEIGS, zeros(k)) 136 | rts = complex.(as, bs) 137 | end 138 | return rts 139 | end 140 | -------------------------------------------------------------------------------- /src/transformations.jl: -------------------------------------------------------------------------------- 1 | ## 2 | ################################################## 3 | 4 | """ 5 | rotations; find values 6 | Real Givens 7 | This subroutine computes c and s such that, 8 | 9 | [c -s] * [a, b] = [r,0]; c^2 + s^2 = 1 10 | 11 | and 12 | 13 | r = sqrt(|a|^2 + |b|^2). 14 | 15 | XXX seems faster to just return r, then not 16 | """ 17 | function givensrot{T <: Real}(a::T,b::T) 18 | iszero(b) && return (sign(a) * one(T), zero(T), abs(a)) 19 | iszero(a) && return(zero(T), -sign(b) * one(T), abs(b)) 20 | 21 | r = hypot(a,b) 22 | return(a/r,-b/r,r) 23 | end 24 | 25 | ## givens rotation 26 | ################################################## 27 | # Compute Givens rotation zeroing b 28 | # 29 | # G1 [ ar + i*ai ] = [ nrm ] 30 | # G1 [ b ] = [ ] 31 | # 32 | # all variables real (nrm complex) 33 | # returns (copmlex(cr, ci), s) with 34 | # u=complex(cr,ci), v=s; then [u -v; v conj(u)] * [complex(ar, ai), s] has 0 35 | # 36 | # XXX: Could hand write this if needed, here we just use `givens` with a flip 37 | # to get what we want, a real s part, not s part 38 | function givensrot{T <: Real}(a::Complex{T},b::Complex{T}) 39 | G, r = givens(b, a, 1, 2) 40 | G.s, -G.c, r 41 | end 42 | givensrot{T <: Real}(a::Complex{T},b::T) = givensrot(a, complex(b, zero(T))) 43 | 44 | 45 | #### Operations on [,[ terms 46 | 47 | ## The zero_index and stop_index+1 point at "D" matrices 48 | ## 49 | ## Let a D matrix be one of [1 0; 0 1] or [-1 0; 0 1] (D^2 = I). Then we have this move 50 | ## D ---> D (we update the rotator) 51 | ## [ [ 52 | ## 53 | ## this is `dflip` 54 | function dflip{T}(a::RealRotator{T}, d=one(T)) 55 | a.s = sign(d)*a.s 56 | end 57 | 58 | # get d from rotator which is RR(1,0) or RR(-1, 0) 59 | function getd{T}(a::RealRotator{T}) 60 | c, s = vals(a) 61 | norm(s) <= 4eps(T) || error("a is not a diagonal rotator") 62 | sign(c) 63 | end 64 | 65 | ## This is main case 66 | # Q D Q 67 | # D --> D 68 | 69 | 70 | """ 71 | D --> D 72 | U V 73 | """ 74 | function Dflip{T}(r::ComplexComplexRotator{T}, d::ComplexComplexRotator{T}) 75 | !is_diagonal(d) && error("d must be diagonal rotator") 76 | 77 | # D is fixed, 78 | alpha = d.c 79 | r.s = r.s * conj(alpha) 80 | end 81 | 82 | ## U --> U Da 83 | ## D Da 84 | ## (not the reverse!) 85 | function Dflip{T}(d::ComplexRealRotator{T}, r::ComplexRealRotator{T}) 86 | # !is_diagonal(d) && error("d must be diagonal rotator") 87 | 88 | alpha = d.c 89 | c,s = vals(r) 90 | vals!(r, c*conj(alpha), s) 91 | end 92 | 93 | ## We have this for left fuse and for deflation 94 | # 95 | # Di Di 96 | # Qi+1 Si+1 Di+1 Si+1 97 | # Qi+2 --> Si+2 Di+2 = Si+2 * diagm([alpha, I, conj(alpha)]) 98 | # ... ... ... ... 99 | # Qj Sj Dj Sj 100 | function cascade(Qs, D, alpha, i, j) 101 | # Q = CR(c,s) -> S = CR(c*conj(alpha), s) 102 | for k in (i+1):j 103 | c,s = vals(Qs[k]) 104 | vals!(Qs[k], c*conj(alpha), s) 105 | end 106 | D[i] *= alpha 107 | D[j+1] *= conj(alpha) 108 | end 109 | 110 | 111 | 112 | 113 | ## Fuse 114 | ## fuse combines two rotations, a and b, into one, 115 | 116 | 117 | ## For ComplexRealRotator, the result of a*b will not have a real sign 118 | ## we output by rotating by alpha. 119 | ## return alpha so a*b = f(ab) * [alpha 0; 0 conj(alpha)] 120 | ## for left with have uv -> (u') Di 121 | function fuse{T}(a::ComplexRealRotator{T}, b::ComplexRealRotator{T},::Type{Val{:left}}) 122 | # idx(a) == idx(b) || error("can't fuse") 123 | u = a.c * b.c - conj(a.s) * b.s 124 | v = conj(a.c) * b.s + a.s * b.c 125 | 126 | alpha = conj(v)/norm(v) 127 | 128 | a.c = u * alpha 129 | a.s = norm(v) 130 | 131 | conj(alpha) 132 | end 133 | 134 | # for right we have uv -> (v') Di 135 | function fuse{T}(a::ComplexRealRotator{T}, b::ComplexRealRotator{T}, ::Type{Val{:right}}) 136 | # idx(a) == idx(b) || error("can't fuse") 137 | u = a.c * b.c - conj(a.s) * b.s 138 | v = conj(a.c) * b.s + a.s * b.c 139 | 140 | 141 | alpha = conj(v)/norm(v) 142 | 143 | b.c = u * alpha 144 | b.s = norm(v) 145 | 146 | conj(alpha) 147 | end 148 | 149 | 150 | ## Fuse for genera rotation 151 | ## We have two functions as it seems a bit faster 152 | fuse{T}(a::Rotator{T}, b::Rotator{T}, dir, d) = fuse(a,b,dir) 153 | 154 | function fuse{T}(a::Rotator{T}, b::Rotator{T},::Type{Val{:left}}) 155 | # idx(a) == idx(b) || error("can't fuse") 156 | u = a.c * b.c - conj(a.s) * b.s 157 | a.s = conj(a.c) * b.s + a.s * b.c 158 | a.c = u 159 | 160 | one(T) 161 | end 162 | function fuse{T}(a::Rotator{T}, b::Rotator{T}, ::Type{Val{:right}}) 163 | # idx(a) == idx(b) || error("can't fuse") 164 | u = a.c * b.c - conj(a.s) * b.s 165 | b.s = conj(a.c) * b.s + a.s * b.c 166 | b.c = u 167 | 168 | one(T) 169 | end 170 | 171 | 172 | # Turnover: Q1 Q3 | x x x | Q1 173 | # Q2 = | x x x | = Q3 Q2 <-- misfit=3 Q1, Q2 shift; 174 | # | x x x | 175 | # 176 | # misfit is Val{:right} for <-- (right to left turnover), Val{:left} for --> 177 | # 178 | # This is the key computation once matrices are written as rotators 179 | # We wrote this for complex rotators where sine part may be complex 180 | # so we make use of alpha and beta, which isn't otherwise needed 181 | # could streamline, but doesn't seem to incur an expense 182 | 183 | function _turnover{T}(Q1::Rotator{T}, Q2::Rotator{T}, Q3::Rotator{T}) 184 | # i,j,k = idx(Q1), idx(Q2), idx(Q3) 185 | # (i == k) || error("Need to have a turnover up down up or down up down: have i=$#i, j=$j, k=$k") 186 | # abs(j-i) == 1 || error("Need to have |i-j| == 1") 187 | 188 | c1,s1 = vals(Q1) 189 | c2,s2 = vals(Q2) 190 | c3,s3 = vals(Q3) 191 | 192 | # key is to find U1,U2,U3 with U2'*U1'*U3' * (Q1*Q2*Q3) = I 193 | # do so by three Givens rotations to make (Q1*Q2*Q3) upper triangular 194 | 195 | # initialize c4 and s4 196 | a = conj(c1)*c2*s3 + s1*c3 197 | b = s2*s3 198 | # check norm([a,b]) \approx 1 199 | c4, s4, temp = givensrot(a,b)#, Val{true}) 200 | 201 | # initialize c5 and s5 202 | 203 | a = c1*c3 - conj(s1)*c2*s3 204 | b = temp 205 | # check norm([a,b]) \approx 1 206 | c5, s5, alpha = givensrot(a, b) 207 | 208 | alpha = alpha/norm(alpha) 209 | c5 *= conj(alpha) # make diagonal elements 1 210 | s5 *= alpha 211 | 212 | # second column 213 | u = -c1*conj(s3) - conj(s1)*c2*conj(c3) 214 | v = conj(c1)*c2*conj(c3) - s1*conj(s3) 215 | w = s2 * conj(c3) 216 | 217 | a = c4*conj(c5)*v - conj(s4)*conj(c5)*w + s5*u 218 | b = conj(c4)*w + s4*v 219 | 220 | c6, s6, beta = givensrot(a,b) 221 | 222 | beta = beta/norm(beta) 223 | c6 *= conj(beta) # make diagonal elements 1 224 | s6 *= beta 225 | 226 | (c4, s4, c5, s5, c6, s6) 227 | end 228 | 229 | 230 | 231 | 232 | function turnover{T}(Q1::Rotator{T}, Q2::Rotator{T}, Q3::Rotator{T}, 233 | ::Type{Val{:right}}) 234 | 235 | c4,s4,c5,s5,c6,s6 = _turnover(Q1,Q2,Q3) 236 | vals!(Q3, conj(c4), -s4) 237 | vals!(Q1, conj(c5), -s5) 238 | vals!(Q2, conj(c6), -s6) 239 | idx!(Q3, idx(Q2)) # misfit is right one 240 | end 241 | 242 | turnover{T}(Q1::Rotator{T}, Q2::Rotator{T}, Q3::Rotator{T}) = turnover(Q1, Q2, Q3, Val{:right}) 243 | 244 | function turnover{T}(Q1::Rotator{T}, Q2::Rotator{T}, Q3::Rotator{T}, 245 | ::Type{Val{:left}}) 246 | 247 | c4,s4,c5,s5,c6,s6 = _turnover(Q1,Q2,Q3) 248 | 249 | vals!(Q2, conj(c4), -s4) 250 | vals!(Q3, conj(c5), -s5) 251 | vals!(Q1, conj(c6), -s6) 252 | idx!(Q1, idx(Q2)) # misfit is left one 253 | end 254 | 255 | 256 | 257 | 258 | 259 | ## passthrough 260 | ## Pass a rotator through a diagonal matrix with phase shifts 261 | ## D U -> U' D' 262 | ## Here D[i] = D[1] 263 | ## usually call with view(state.d, idx(U):idx(U)+1) 264 | function passthrough{T}(D, U::ComplexRealRotator{T}) 265 | alpha, beta = D[1], D[2] 266 | 267 | c, s = vals(U) 268 | u = c * alpha * conj(beta) 269 | v = s 270 | vals!(U, u, v) 271 | D[1], D[2] = beta, alpha 272 | end 273 | 274 | function passthrough{T}(D::ComplexRealRotator{T}, U::ComplexRealRotator{T}) 275 | norm(D.s) <= 1e2*eps(T) || error("D not diagonal") 276 | alpha, ds = vals(D) 277 | c,s = vals(U) 278 | vals!(U, c*alpha*alpha, s) 279 | vals!(D, conj(alpha), ds) 280 | end 281 | -------------------------------------------------------------------------------- /test/runtests.jl: -------------------------------------------------------------------------------- 1 | using AMVW 2 | const A = AMVW 3 | using Base.Test 4 | using Polynomials 5 | 6 | # transformations 7 | 8 | # givens rotation 9 | @testset "Givens rotations" begin 10 | a,b = complex(1.0, 2.0), complex(2.0, 3.0) 11 | c,s,r = A.givensrot(a,b) 12 | @test norm(([c -conj(s); s conj(c)] * [a,b])[2]) <= 4eps(Float64) 13 | a,b = complex(rand(2)...), complex(rand(2)...) 14 | c,s,r = A.givensrot(a,b) 15 | @test norm(([c -conj(s); s conj(c)] * [a,b])[2]) <= 4eps(Float64) 16 | end 17 | 18 | 19 | 20 | 21 | # dflip 22 | @testset "D flip" begin 23 | t1 = pi/3; 24 | alpha = complex(cos(t1), sin(t1)) 25 | d = AMVW.ComplexComplexRotator(alpha, complex(0.0, 0.0), 1) 26 | u = one(AMVW.ComplexComplexRotator{Float64}) 27 | AMVW.vals!(u, complex(1.0, 2.0), complex(2.0, 3.0)); AMVW.idx!(u, 2) 28 | M = A.as_full(u, 3) * A.as_full(d,3) 29 | A.Dflip(u, d) 30 | M1 = A.as_full(d, 3) * A.as_full(u,3) 31 | u = M - M1 32 | @test maximum(norm.(u)) <= 4eps() 33 | 34 | 35 | 36 | # complexrealrotator is different 37 | # U --> U D 38 | # D D 39 | r1,r2 = ones(AMVW.ComplexRealRotator{Float64},2) 40 | AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 1) 41 | di = one(AMVW.ComplexRealRotator{Float64}) 42 | AMVW.vals!(di, complex(cos(pi/3), sin(pi/3)), 0.0); AMVW.idx!(di, 2) 43 | M = A.as_full(di, 3) * A.as_full(r1, 3) 44 | AMVW.Dflip(di, r1) 45 | dic = copy(di); AMVW.idx!(dic, AMVW.idx(r1)) 46 | M1 = AMVW.as_full(r1, 3) * AMVW.as_full(dic, 3) * AMVW.as_full(di, 3) 47 | @test maximum(norm.(M - M1)) <= 4eps() 48 | 49 | ## D --> D 50 | ## U U D 51 | ## 52 | r1,r2 = ones(AMVW.ComplexRealRotator{Float64},2) 53 | AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 2) 54 | di = one(AMVW.ComplexRealRotator{Float64}) 55 | AMVW.vals!(di, complex(cos(pi/3), sin(pi/3)), 0.0); AMVW.idx!(di, 1) 56 | M = A.as_full(di, 3) * A.as_full(r1, 3) 57 | AMVW.Dflip(di, r1) 58 | dic = copy(di); AMVW.idx!(dic, AMVW.idx(r1)) 59 | M1 = AMVW.as_full(r1, 3) * AMVW.as_full(dic, 3) * AMVW.as_full(di, 3) 60 | @test maximum(norm.(M - M1)) <= 4eps() 61 | 62 | ## 63 | ## Q --> D Q 64 | ## D D 65 | r1,r2 = ones(AMVW.ComplexRealRotator{Float64},2) 66 | AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 1) 67 | di = one(AMVW.ComplexRealRotator{Float64}) 68 | AMVW.vals!(di, complex(cos(pi/3), sin(pi/3)), 0.0); AMVW.idx!(di, 2) 69 | M = A.as_full(r1, 3) * A.as_full(di, 3) 70 | AMVW.Dflip(di, r1) 71 | dic = copy(di); AMVW.idx!(dic, AMVW.idx(r1)) 72 | M1 = AMVW.as_full(dic, 3) * AMVW.as_full(di, 3) * AMVW.as_full(r1, 3) 73 | @test maximum(norm.(M - M1)) <= 4eps() 74 | 75 | end 76 | 77 | @testset "Fuse" begin 78 | ## 79 | # fuse 80 | r1,r2 = ones(AMVW.ComplexComplexRotator{Float64},2) 81 | AMVW.vals!(r1, complex(1.0, 2.0), complex(2.0, 3.0)); AMVW.idx!(r1, 1) 82 | AMVW.vals!(r2, complex(3.0, 2.0), complex(5.0, 3.0)); AMVW.idx!(r2, 1) 83 | M = A.as_full(r1,2) * A.as_full(r2, 2) 84 | A.fuse(r1, r2, Val{:left}) 85 | M1 = A.as_full(r1, 2) 86 | u = M - M1 87 | @test maximum(norm.(u)) <= 4eps() 88 | 89 | 90 | 91 | r1,r2 = ones(AMVW.ComplexRealRotator{Float64},2) 92 | AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 1) 93 | AMVW.vals!(r2, complex(3.0, 2.0), 5.0); AMVW.idx!(r2, 1) 94 | M = A.as_full(r1,2) * A.as_full(r2, 2) 95 | alpha = A.fuse(r1, r2, Val{:left}) 96 | M1 = A.as_full(r1, 2) * diagm([alpha, conj(alpha)]) 97 | u = M - M1 98 | @test maximum(norm.(u)) <= 4eps() 99 | 100 | 101 | r1,r2 = ones(AMVW.ComplexRealRotator{Float64},2) 102 | AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 1) 103 | AMVW.vals!(r2, complex(3.0, 2.0), 5.0); AMVW.idx!(r2, 1) 104 | M = A.as_full(r1,2) * A.as_full(r2, 2) 105 | alpha = A.fuse(r1, r2, Val{:right}) 106 | M1 = A.as_full(r2, 2) * diagm([alpha, conj(alpha)]) 107 | u = M - M1 108 | @test maximum(norm.(u)) <= 4eps() 109 | 110 | end 111 | 112 | ## Cascade 113 | @testset "Cascade" begin 114 | D1, Q1, Q2,Q3,Q4 = ones(AMVW.ComplexRealRotator{Float64},5) 115 | alpha = complex(1.0, -1.0) 116 | alpha = alpha/norm(alpha) 117 | AMVW.vals!(D1, alpha, 0.0); AMVW.idx!(D1, 1) 118 | AMVW.vals!(Q2, complex(1.0, 2.0), 2.0); AMVW.idx!(Q2, 2) 119 | AMVW.vals!(Q3, complex(3.0, 2.0), 5.0); AMVW.idx!(Q3, 3) 120 | AMVW.vals!(Q4, complex(2.0, 2.0), 3.0); AMVW.idx!(Q4, 4) 121 | 122 | M1 = A.as_full(D1, 5) * A.as_full(Q2, 5) * A.as_full(Q3, 5) * A.as_full(Q4, 5) 123 | D = ones(Complex{Float64}, 5) 124 | Qs = [Q1, Q2, Q3, Q4] 125 | A.cascade(Qs, D, alpha, 1, 4) 126 | M2 = A.as_full(Q2, 5) * A.as_full(Q3, 5) * A.as_full(Q4, 5) * diagm(D) 127 | 128 | u = M1 - M2 129 | @test maximum(norm.(u)) <= 4eps() 130 | 131 | end 132 | 133 | ## 134 | # turnover 135 | @testset "Turnover" begin 136 | r1,r2,r3 = ones(AMVW.ComplexComplexRotator{Float64}, 3) 137 | AMVW.vals!(r1, complex(1.0, 2.0), complex(2.0, 3.0)); AMVW.idx!(r1, 1) 138 | AMVW.vals!(r2, complex(3.0, 2.0), complex(5.0, 3.0)); AMVW.idx!(r2, 2) 139 | AMVW.vals!(r3, complex(4.0, 2.0), complex(6.0, 3.0)); AMVW.idx!(r3, 1) 140 | 141 | M = A.as_full(r1,3) * A.as_full(r2,3) * A.as_full(r3,3) 142 | A.turnover(r1, r2, r3, Val{:right}) 143 | M1 = A.as_full(r3,3) * A.as_full(r1,3) * A.as_full(r2,3) 144 | u = M - M1 145 | @test maximum(norm.(u)) <= 4eps() 146 | 147 | 148 | 149 | r1,r2,r3 = ones(AMVW.ComplexRealRotator{Float64}, 3) 150 | AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 1) 151 | AMVW.vals!(r2, complex(3.0, 2.0), 5.0); AMVW.idx!(r2, 2) 152 | AMVW.vals!(r3, complex(4.0, 2.0), 6.0); AMVW.idx!(r3, 1) 153 | 154 | M = A.as_full(r1,3) * A.as_full(r2,3) * A.as_full(r3,3) 155 | A.turnover(r1, r2, r3, Val{:right}) 156 | M1 = A.as_full(r3,3) * A.as_full(r1,3) * A.as_full(r2,3) 157 | u = M - M1 158 | @test maximum(norm.(u)) <= 4eps() 159 | A.turnover(r3, r1, r2, Val{:left}) 160 | M2 = A.as_full(r1,3) * A.as_full(r2,3) * A.as_full(r3,3) 161 | u = M - M1 162 | @test maximum(norm.(u)) <= 4eps() 163 | 164 | end 165 | 166 | 167 | # passthrough 168 | @testset "Passthrough" begin 169 | r1,r2,r3 = ones(AMVW.ComplexRealRotator{Float64}, 3) 170 | AMVW.vals!(r1, complex(1.0, 2.0), 2.0); AMVW.idx!(r1, 1) 171 | t1,t2,t3=pi/3,pi/4, pi/5 172 | cplx(t) = complex(cos(t), sin(t)) 173 | D = [cplx(t) for t in [t1,t2,t3]] 174 | M = diagm(D) * AMVW.as_full(r1, 3) 175 | AMVW.passthrough(D, r1) 176 | M1 = AMVW.as_full(r1, 3) * diagm(D) 177 | u = M - M1 178 | @test maximum(norm.(u)) <= 4eps() 179 | 180 | 181 | end 182 | 183 | 184 | @testset "poly_roots" begin 185 | 186 | ## real coeffs 187 | rs = [1.0, 2.0, 3.0] 188 | p = poly(rs) 189 | rts = AMVW.poly_roots(p.a) 190 | sort!(rts, by=norm) 191 | @test maximum(norm.(rts - rs)) <= 1e-6 192 | 193 | # complex coeffs 194 | rs = [1.0, 2.0, 3.0, 4.0 + 1.0im] 195 | p = poly(rs) 196 | rts = AMVW.poly_roots(p.a) 197 | sort!(rts, by=norm) 198 | @test maximum(norm.(rts - rs)) <= 1e-6 199 | 200 | 201 | # ComplexComplex 202 | rs = [1.0, 2.0, 3.0, 4.0+0im] 203 | p = poly(rs) 204 | qs, k = AMVW.reverse_poly(p.a) 205 | state = AMVW.ComplexComplexSingleShift(qs) 206 | AMVW.init_state(state) 207 | AMVW.AMVW_algorithm(state) 208 | rts = complex.(state.REIGS, state.IEIGS) 209 | sort!(rts, by=norm) 210 | @test maximum(norm.(rts - rs)) <= 1e-6 211 | 212 | ## simple cases 213 | # n=1 214 | rs = [1.0] 215 | p = poly(rs) 216 | rts = AMVW.poly_roots(p.a) 217 | @test maximum(norm.(rts - rs)) <= 1e-6 218 | 219 | rs = [1.0 + im] 220 | p = poly(rs) 221 | rts = AMVW.poly_roots(p.a) 222 | @test maximum(norm.(rts - rs)) <= 1e-6 223 | 224 | # n = 2 225 | rs = [1.0, 2.0] 226 | p = poly(rs) 227 | rts = AMVW.poly_roots(p.a) 228 | sort!(rts, by=norm) 229 | @test maximum(norm.(rts - rs)) <= 1e-6 230 | 231 | rs = [1.0, 2.0+im] 232 | p = poly(rs) 233 | rts = AMVW.poly_roots(p.a) 234 | sort!(rts, by=norm) 235 | @test maximum(norm.(rts - rs)) <= 1e-6 236 | 237 | # zeros 238 | rs = [1.0, 2.0, 3.0, 0.0, 0.0] 239 | sort!(rs, by=norm) 240 | p = poly(rs) 241 | rts = AMVW.poly_roots(p.a) 242 | sort!(rts, by=norm) 243 | @test maximum(norm.(rts - rs)) <= 1e-6 244 | 245 | rs = [1.0, 2.0, 3.0+im, 0.0, 0.0] 246 | sort!(rs, by=norm) 247 | p = poly(rs) 248 | rts = AMVW.poly_roots(p.a) 249 | sort!(rts, by=norm) 250 | @test maximum(norm.(rts - rs)) <= 1e-6 251 | 252 | end 253 | -------------------------------------------------------------------------------- /src/types.jl: -------------------------------------------------------------------------------- 1 | ## Types 2 | 3 | ## A container for our counters 4 | mutable struct AMVW_Counter 5 | zero_index::Int 6 | start_index::Int 7 | stop_index::Int 8 | it_count::Int 9 | tr::Int 10 | end 11 | 12 | 13 | ## Rotators 14 | 15 | ## Our rotators have field names c, s where c nad s are either T or Complex{T} 16 | @compat abstract type CoreTransform{T} end 17 | @compat abstract type Rotator{T} <: CoreTransform{T} end 18 | 19 | is_diagonal{T}(r::Rotator{T}) = norm(r.s) <= eps(T) 20 | 21 | 22 | Base.copy(a::Rotator) = Rotator(a.c, a.s, a.i) 23 | function Base.copy!(a::Rotator, b::Rotator) 24 | vals!(a, vals(b)...) 25 | idx!(a, idx(b)) 26 | end 27 | 28 | ## set values 29 | vals{T}(r::Rotator{T}) = (r.c, r.s) 30 | idx(r::Rotator) = r.i 31 | idx!(r::Rotator, i::Int) = r.i = i 32 | 33 | 34 | 35 | 36 | #the index is superflous for now, and a bit of a hassle to keep immutable 37 | #but might be of help later if twisting is approached. Shouldn't effect speed, but does mean 3N storage (Q, Ct, B) 38 | #so may be 39 | # 40 | mutable struct RealRotator{T} <: Rotator{T} 41 | c::T 42 | s::T 43 | i::Int 44 | end 45 | 46 | function Base.ctranspose(r::RealRotator) 47 | RealRotator(r.c, -r.s, r.i) 48 | end 49 | 50 | 51 | Base.one{T}(::Type{RealRotator{T}}) = RealRotator(one(T), zero(T), 0) 52 | Base.ones{T}(S::Type{RealRotator{T}}, N) = [one(S) for i in 1:N] 53 | 54 | ## set values 55 | function vals!{T}(r::RealRotator, c::T, s::T) 56 | # normalize in case of roundoff errors 57 | # but, using hueristic on 6.3 on square roots 58 | 59 | nrmi = sqrt(c^2 + s^2 ) 60 | nrmi = norm(nrmi - one(T)) >= 1e2*eps(T) ? inv(nrmi) : one(T) 61 | r.c = c * nrmi 62 | r.s = s * nrmi 63 | end 64 | 65 | ################################################## 66 | ### Okay, now try with complex C, real s 67 | 68 | mutable struct ComplexRealRotator{T} <: Rotator{T} 69 | c::Complex{T} 70 | s::T 71 | i::Int 72 | end 73 | 74 | function Base.ctranspose(r::ComplexRealRotator) 75 | ComplexRealRotator(conj(r.c), -r.s, r.i) 76 | end 77 | 78 | function vals!{T}(r::ComplexRealRotator, c::Complex{T}, s::T) 79 | # normalize in case of roundoff errors 80 | # but, using hueristic on 6.3 on square roots 81 | 82 | nrmi = sqrt(abs(c * conj(c) + s^2)) 83 | nrmi = norm(nrmi - one(T)) >= eps(T) ? inv(nrmi) : one(T) 84 | r.c = c * nrmi 85 | r.s = s * nrmi 86 | end 87 | function vals!{T}(r::ComplexRealRotator, c::Complex{T}, s::Complex{T}) 88 | ## abs(imag(s)) < 4eps(T) || error("setting vals needs real s, got $s") 89 | vals!(r, c, real(s)) 90 | end 91 | vals!{T}(r::ComplexRealRotator{T}, c::T, s::T) = vals!(r, complex(c, zero(T)), s) 92 | 93 | Base.one{T}(::Type{ComplexRealRotator{T}}) = ComplexRealRotator(complex(one(T), zero(T)), zero(T), 0) 94 | Base.ones{T}(S::Type{ComplexRealRotator{T}}, N) = [one(S) for i in 1:N] 95 | 96 | 97 | 98 | Base.copy(a::ComplexRealRotator) = ComplexRealRotator(a.c, a.s, a.i) 99 | function Base.copy!(a::ComplexRealRotator, b::ComplexRealRotator) 100 | vals!(a, vals(b)...) 101 | idx!(a, idx(b)) 102 | end 103 | 104 | 105 | 106 | 107 | ################################################## 108 | ## We use two complex, rather than 3 reals here. 109 | ## Will be basically the ame storage, as we don't need to include a D, but not quite (12N, not 11N) 110 | 111 | mutable struct ComplexComplexRotator{T} <: Rotator{T} 112 | c::Complex{T} 113 | s::Complex{T} 114 | i::Int 115 | end 116 | 117 | function Base.ctranspose(r::ComplexComplexRotator) 118 | ComplexComplexRotator(conj(r.c), -r.s, r.i) 119 | end 120 | 121 | 122 | Base.one{T}(::Type{ComplexComplexRotator{T}}) = ComplexComplexRotator(complex(one(T), zero(T)), complex(zero(T), zero(T)), 0) 123 | Base.ones{T}(S::Type{ComplexComplexRotator{T}}, N) = [one(S) for i in 1:N] 124 | 125 | ## set values 126 | function vals!{T}(r::ComplexComplexRotator, c::Complex{T}, s::Complex{T}) 127 | # normalize in case of roundoff errors 128 | # but, using hueristic on 6.3 on square roots 129 | 130 | nrmi = sqrt(abs(c * conj(c) + s * conj(s))) 131 | nrmi = norm(nrmi - one(T)) >= eps(T) ? inv(nrmi) : one(T) 132 | r.c = c * nrmi 133 | r.s = s * nrmi 134 | end 135 | vals!{T}(r::ComplexComplexRotator, c::Complex{T}, s::T) = vals!(r, c, complex(s,zero(T))) 136 | vals!{T}(r::ComplexComplexRotator{T}, c::T, s::T) = vals!(r, complex(c, zero(T)), complex(s, zero(T))) 137 | 138 | 139 | 140 | 141 | 142 | ### Shift Types 143 | 144 | 145 | @compat abstract type ShiftType{T} end 146 | struct RealDoubleShift{T} <: ShiftType{T} 147 | N::Int 148 | POLY::Vector{T} 149 | Q::Vector{RealRotator{T}} 150 | Ct::Vector{RealRotator{T}} # We use C', not C here 151 | B::Vector{RealRotator{T}} 152 | REIGS::Vector{T} 153 | IEIGS::Vector{T} 154 | ## reusable storage 155 | U::RealRotator{T} 156 | Ut::RealRotator{T} 157 | V::RealRotator{T} 158 | Vt::RealRotator{T} 159 | W::RealRotator{T} 160 | A::Matrix{T} # for parts of A = QR 161 | Bk::Matrix{T} # for diagonal block 162 | R::Matrix{T} # temp storage, sometimes R part of QR 163 | e1::Vector{T} # eigen values e1, e2 164 | e2::Vector{T} 165 | ctrs::AMVW_Counter 166 | end 167 | 168 | function Base.convert{T}(::Type{RealDoubleShift}, ps::Vector{T}) 169 | N = length(ps) 170 | 171 | RealDoubleShift(N, ps, 172 | ones(RealRotator{T}, N), #Q 173 | ones(RealRotator{T}, N), #Ct 174 | ones(RealRotator{T}, N), #B 175 | zeros(T, N), zeros(T, N), #EIGS 176 | one(RealRotator{T}), one(RealRotator{T}), 177 | one(RealRotator{T}), one(RealRotator{T}), 178 | one(RealRotator{T}), #U,U',V,V',W 179 | zeros(T, 2, 2),zeros(T, 3, 2),zeros(T, 3, 2), # A Bk R 180 | zeros(T,2), zeros(T,2), 181 | AMVW_Counter(0,1,N-1, 0, N-2) 182 | ) 183 | end 184 | 185 | ####################################################### 186 | ## State for ComplexReal type 187 | 188 | mutable struct ComplexRealSingleShift{T} <: ShiftType{T} 189 | N::Int 190 | POLY::Vector{Complex{T}} 191 | Q::Vector{ComplexRealRotator{T}} 192 | Ct::Vector{ComplexRealRotator{T}} # We use C', not C here 193 | B::Vector{ComplexRealRotator{T}} 194 | D::Vector{Complex{T}} 195 | Dp::Vector{Complex{T}} 196 | REIGS::Vector{T} 197 | IEIGS::Vector{T} 198 | ## reusable storage 199 | U::ComplexRealRotator{T} 200 | Ut::ComplexRealRotator{T} 201 | Di::ComplexRealRotator{T} 202 | A::Matrix{Complex{T}} # for parts of A = QR 203 | Bk::Matrix{Complex{T}} # for diagonal block 204 | R::Matrix{Complex{T}} # temp storage, sometimes R part of QR 205 | e1::Vector{T} # eigen values e1, e2, store as (re,imag) 206 | e2::Vector{T} 207 | ray::Bool 208 | ctrs::AMVW_Counter 209 | end 210 | 211 | function Base.convert{T}(::Type{ComplexRealSingleShift}, ps::Vector{Complex{T}}) 212 | N = length(ps) 213 | 214 | ComplexRealSingleShift(N, ps, 215 | ones(ComplexRealRotator{T}, N), #Q 216 | ones(ComplexRealRotator{T}, N), #Ct 217 | ones(ComplexRealRotator{T}, N), #B 218 | ones(Complex{T}, N+1), # D 219 | ones(Complex{T}, 2), # Dp ## XXX try to cut allocations in passthrough 220 | zeros(T, N), zeros(T, N), #EIGS 221 | one(ComplexRealRotator{T}), one(ComplexRealRotator{T}), #U, Ut 222 | one(ComplexRealRotator{T}), # Di 223 | zeros(Complex{T}, 2, 2),zeros(Complex{T}, 3, 2), 224 | zeros(Complex{T}, 3, 2), # A Bk R 225 | zeros(T,2), zeros(T,2), 226 | # true, # true for Wilkinson, false for Rayleigh.Make adjustable! 227 | false, 228 | AMVW_Counter(0,1,N-1, 0, N-2) 229 | ) 230 | end 231 | 232 | ################################################## 233 | ## State for ComplexComplex Rotator type (no D) 234 | 235 | struct ComplexComplexSingleShift{T} <: ShiftType{T} 236 | N::Int 237 | POLY::Vector{Complex{T}} 238 | Q::Vector{ComplexComplexRotator{T}} 239 | Ct::Vector{ComplexComplexRotator{T}} # We use C', not C here 240 | B::Vector{ComplexComplexRotator{T}} 241 | REIGS::Vector{T} 242 | IEIGS::Vector{T} 243 | ## reusable storage 244 | U::ComplexComplexRotator{T} 245 | Ut::ComplexComplexRotator{T} 246 | A::Matrix{Complex{T}} # for parts of A = QR 247 | Bk::Matrix{Complex{T}} # for diagonal block 248 | R::Matrix{Complex{T}} # temp storage, sometimes R part of QR 249 | e1::Vector{T} # eigen values e1, e2, store as (re,imag) 250 | e2::Vector{T} 251 | ray::Bool 252 | ctrs::AMVW_Counter 253 | end 254 | 255 | function Base.convert{T}(::Type{ComplexComplexSingleShift}, ps::Vector{Complex{T}}) 256 | N = length(ps) 257 | 258 | ComplexComplexSingleShift(N, ps, 259 | ones(ComplexComplexRotator{T}, N), #Q 260 | ones(ComplexComplexRotator{T}, N), #Ct 261 | ones(ComplexComplexRotator{T}, N), #B 262 | zeros(T, N), zeros(T, N), #EIGS 263 | one(ComplexComplexRotator{T}), one(ComplexComplexRotator{T}), #U, Ut 264 | zeros(Complex{T}, 2, 2),zeros(Complex{T}, 3, 2), 265 | zeros(Complex{T}, 3, 2), # A Bk R 266 | zeros(T,2), zeros(T,2), 267 | false, # true for Wilkinson, false for Rayleigh 268 | AMVW_Counter(0,1,N-1, 0, N-2) 269 | ) 270 | end 271 | 272 | -------------------------------------------------------------------------------- /test/benchmark.jl: -------------------------------------------------------------------------------- 1 | using AMVW 2 | using BenchmarkTools 3 | using Polynomials 4 | using PolynomialRoots 5 | using DataFrames 6 | 7 | function _residual_check(rs, rts) 8 | p = poly(rs) 9 | pp = polyder(p) 10 | 11 | # r1 = |P(lambda)/P'(lambda)| 12 | # r2 = |P(lambda)/P'(lambda)/lambda| 13 | 14 | r0 = maximum(norm.(sort(rs, by=norm) - sort(rts, by=norm))) # nonsensical for some cases 15 | r1 = maximum(norm.(p.(rts) ./ pp.(rts))) 16 | r2 = maximum(norm.(p.(rts) ./ pp.(rts) ./ rts )) 17 | 18 | [r0, r1, r2] 19 | end 20 | 21 | function residual_check(rs) 22 | A = DataFrame(Polynomials=zeros(3),AMVW=zeros(3),PolynomialRoots=zeros(3)) 23 | 24 | p = poly(rs) 25 | A[:,1] = _residual_check(rs, Polynomials.roots(p)) 26 | A[:,2] = _residual_check(rs, AMVW.poly_roots(p.a)) 27 | A[:,3] = _residual_check(rs, PolynomialRoots.roots(p.a)) 28 | 29 | A 30 | end 31 | 32 | 33 | # small n, real 34 | n = 10 35 | rs = linspace(1/n, 1, n) 36 | p = poly(rs) 37 | 38 | @benchmark Polynomials.roots(p) 39 | # julia> @benchmark Polynomials.roots(p) 40 | # BenchmarkTools.Trial: 41 | # memory estimate: 8.20 KiB 42 | # allocs estimate: 50 43 | # -------------- 44 | # minimum time: 98.108 μs (0.00% GC) 45 | # median time: 102.220 μs (0.00% GC) 46 | # mean time: 108.525 μs (1.16% GC) 47 | # maximum time: 3.642 ms (94.16% GC) 48 | # -------------- 49 | # samples: 10000 50 | # evals/sample: 1 51 | # time tolerance: 5.00% 52 | # memory tolerance: 1.00% 53 | 54 | @benchmark AMVW.poly_roots(p.a) 55 | # julia> @benchmark AMVW.poly_roots(p.a) 56 | # BenchmarkTools.Trial: 57 | # memory estimate: 4.27 KiB 58 | # allocs estimate: 66 59 | # -------------- 60 | # minimum time: 93.309 μs (0.00% GC) 61 | # median time: 150.606 μs (0.00% GC) 62 | # mean time: 155.830 μs (0.26% GC) 63 | # maximum time: 4.237 ms (94.03% GC) 64 | # -------------- 65 | # samples: 10000 66 | # evals/sample: 1 67 | # time tolerance: 5.00% 68 | # memory tolerance: 1.00% 69 | 70 | 71 | 72 | 73 | @benchmark PolynomialRoots.roots(p.a) 74 | 75 | # with deprecation warnings 76 | # BenchmarkTools.Trial: 77 | # memory estimate: 12.20 KiB 78 | # allocs estimate: 77 79 | # -------------- 80 | # minimum time: 229.386 μs (0.00% GC) 81 | # median time: 241.773 μs (0.00% GC) 82 | # mean time: 253.786 μs (0.62% GC) 83 | # maximum time: 8.150 ms (0.00% GC) 84 | # -------------- 85 | # samples: 10000 86 | # evals/sample: 1 87 | # time tolerance: 5.00% 88 | # memory tolerance: 1.00% 89 | 90 | # Complex polynomials 91 | rs = [x+im for x in 1.0 : 6] 92 | p = poly(rs) 93 | 94 | @benchmark Polynomials.roots(p) 95 | 96 | # julia> @benchmark Polynomials.roots(p) 97 | # BenchmarkTools.Trial: 98 | # memory estimate: 5.59 KiB 99 | # allocs estimate: 52 100 | # -------------- 101 | # minimum time: 33.073 μs (0.00% GC) 102 | # median time: 34.435 μs (0.00% GC) 103 | # mean time: 36.119 μs (1.96% GC) 104 | # maximum time: 3.713 ms (95.31% GC) 105 | # -------------- 106 | # samples: 10000 107 | # evals/sample: 1 108 | # time tolerance: 5.00% 109 | # memory tolerance: 1.00% 110 | 111 | @benchmark AMVW.poly_roots(p.a) 112 | # BenchmarkTools.Trial: 113 | # memory estimate: 4.66 KiB 114 | # allocs estimate: 56 115 | # -------------- 116 | # minimum time: 102.511 μs (0.00% GC) 117 | # median time: 143.153 μs (0.00% GC) 118 | # mean time: 157.052 μs (0.70% GC) 119 | # maximum time: 49.993 ms (0.00% GC) 120 | # -------------- 121 | # samples: 10000 122 | # evals/sample: 1 123 | # time tolerance: 5.00% 124 | # memory tolerance: 1.00% 125 | 126 | @benchmark PolynomialRoots.roots(p.a) 127 | # BenchmarkTools.Trial: 128 | # memory estimate: 11.63 KiB 129 | # allocs estimate: 76 130 | # -------------- 131 | # minimum time: 226.424 μs (0.00% GC) 132 | # median time: 232.025 μs (0.00% GC) 133 | # mean time: 248.389 μs (0.89% GC) 134 | # maximum time: 7.717 ms (0.00% GC) 135 | # -------------- 136 | # samples: 10000 137 | # evals/sample: 1 138 | # time tolerance: 5.00% 139 | # memory tolerance: 1.00% 140 | 141 | ## XXX We have an issue with this one though 142 | n = 10 143 | ts = linspace(1/n, 1.0, n) * 2pi 144 | rs = [complex(cos(t), sin(t)) for t in ts] 145 | p = poly(rs) 146 | 147 | @benchmark Polynomials.roots(p) 148 | # julia> @benchmark PolynomialRoots.roots(p.a) 149 | # BenchmarkTools.Trial: 150 | # memory estimate: 11.95 KiB 151 | # allocs estimate: 76 152 | # -------------- 153 | # minimum time: 301.440 μs (0.00% GC) 154 | # median time: 306.313 μs (0.00% GC) 155 | # mean time: 326.734 μs (0.61% GC) 156 | # maximum time: 4.879 ms (91.35% GC) 157 | # -------------- 158 | # samples: 10000 159 | # evals/sample: 1 160 | # time tolerance: 5.00% 161 | # memory tolerance: 1.00% 162 | 163 | @benchmark AMVW.poly_roots(p.a) # use state.ray=false 164 | # ## XXX Stil misses alot 165 | # BenchmarkTools.Trial: 166 | # memory estimate: 5.88 KiB 167 | # allocs estimate: 68 168 | # -------------- 169 | # minimum time: 354.586 μs (0.00% GC) 170 | # median time: 401.278 μs (0.00% GC) 171 | # mean time: 443.340 μs (0.30% GC) 172 | # maximum time: 6.563 ms (90.24% GC) 173 | # -------------- 174 | # samples: 10000 175 | # evals/sample: 1 176 | # time tolerance: 5.00% 177 | # memory tolerance: 1.00% 178 | 179 | 180 | @benchmark PolynomialRoots.roots(p.a) 181 | 182 | # julia> @benchmark PolynomialRoots.roots(p.a) 183 | # BenchmarkTools.Trial: 184 | # memory estimate: 11.95 KiB 185 | # allocs estimate: 76 186 | # -------------- 187 | # minimum time: 302.533 μs (0.00% GC) 188 | # median time: 309.085 μs (0.00% GC) 189 | # mean time: 330.070 μs (0.60% GC) 190 | # maximum time: 5.024 ms (88.43% GC) 191 | # -------------- 192 | # samples: 10000 193 | # evals/sample: 1 194 | # time tolerance: 5.00% 195 | # memory tolerance: 1.00% 196 | 197 | # "Big" polynomials 198 | n = 10 199 | rs = linspace(1/n, big(1.0), n) 200 | 201 | p = poly(rs) 202 | 203 | @benchmark Polynomials.roots(p) 204 | ## Error (no support for big) 205 | 206 | @benchmark AMVW.poly_roots(p.a) 207 | # julia> @benchmark AMVW.poly_roots(p.a) 208 | # BenchmarkTools.Trial: 209 | # memory estimate: 10.14 MiB 210 | # allocs estimate: 204777 211 | # -------------- 212 | # minimum time: 26.071 ms (0.00% GC) 213 | # median time: 56.661 ms (33.13% GC) 214 | # mean time: 52.697 ms (24.94% GC) 215 | # maximum time: 74.463 ms (25.16% GC) 216 | # -------------- 217 | # samples: 95 218 | # evals/sample: 1 219 | # time tolerance: 5.00% 220 | # memory tolerance: 1.00% 221 | 222 | # julia> maximum(norm.(sort(AMVW.poly_roots(p.a), by=norm) - sort(rs, by=norm))) 223 | # 3.542606431077360733112308774961528146389295822293876354568756300943283335489686e-71 224 | 225 | @benchmark PolynomialRoots.roots(p.a) 226 | # BenchmarkTools.Trial: 227 | # memory estimate: 1.25 MiB 228 | # allocs estimate: 26226 229 | # -------------- 230 | # minimum time: 1.669 ms (0.00% GC) 231 | # median time: 1.873 ms (0.00% GC) 232 | # mean time: 3.034 ms (36.15% GC) 233 | # maximum time: 26.625 ms (91.54% GC) 234 | # -------------- 235 | # samples: 1621 236 | # evals/sample: 1 237 | # time tolerance: 5.00% 238 | # memory tolerance: 1.00% 239 | 240 | # julia> maximum(norm.(sort(PolynomialRoots.roots(p.a), by=norm) - sort(rs, by=norm))) 241 | # 1.154958001715555551976043766371611436643198308737886091026144983582996624795927e-72 242 | 243 | ################################################## 244 | 245 | # larger n (50) 246 | 247 | n = 50 248 | rs = linspace(1/n, 1, n) 249 | p = poly(rs) 250 | 251 | @benchmark Polynomials.roots(p) 252 | # julia> @benchmark Polynomials.roots(p) 253 | # BenchmarkTools.Trial: 254 | # memory estimate: 61.78 KiB 255 | # allocs estimate: 82 256 | # -------------- 257 | # minimum time: 1.006 ms (0.00% GC) 258 | # median time: 1.073 ms (0.00% GC) 259 | # mean time: 1.128 ms (0.57% GC) 260 | # maximum time: 4.822 ms (65.08% GC) 261 | # -------------- 262 | # samples: 4306 263 | # evals/sample: 1 264 | # time tolerance: 5.00% 265 | # memory tolerance: 1.00% 266 | 267 | 268 | @benchmark AMVW.poly_roots(p.a) 269 | # julia> @benchmark AMVW.poly_roots(p.a) 270 | # BenchmarkTools.Trial: 271 | # memory estimate: 12.73 KiB 272 | # allocs estimate: 186 273 | # -------------- 274 | # minimum time: 1.556 ms (0.00% GC) 275 | # median time: 2.432 ms (0.00% GC) 276 | # mean time: 2.515 ms (0.08% GC) 277 | # maximum time: 8.164 ms (46.20% GC) 278 | # -------------- 279 | # samples: 1962 280 | # evals/sample: 1 281 | # time tolerance: 5.00% 282 | # memory tolerance: 1.00% 283 | 284 | @benchmark PolynomialRoots.roots(p.a) 285 | # julia> @benchmark PolynomialRoots.roots(p.a) 286 | # BenchmarkTools.Trial: 287 | # memory estimate: 14.72 KiB 288 | # allocs estimate: 77 289 | # -------------- 290 | # minimum time: 422.714 μs (0.00% GC) 291 | # median time: 426.969 μs (0.00% GC) 292 | # mean time: 446.475 μs (0.53% GC) 293 | # maximum time: 4.512 ms (88.61% GC) 294 | # -------------- 295 | # samples: 10000 296 | # evals/sample: 1 297 | # time tolerance: 5.00% 298 | # memory tolerance: 1.00% 299 | 300 | ################################################## 301 | 302 | # residual check 303 | n = 10; 304 | rs = linspace(1/n, 1, n); 305 | p = poly(rs); 306 | residual_check(rs) 307 | 308 | # julia> residual_check(rs) 309 | # 3×3 DataFrames.DataFrame 310 | # │ Row │ Polynomials │ AMVW │ PolynomialRoots │ 311 | # ├─────┼─────────────┼────────────┼─────────────────┤ 312 | # │ 1 │ 2.09855e-10 │ 1.18857e-9 │ 5.75882e-11 │ 313 | # │ 2 │ 1.4072e-10 │ 1.13841e-9 │ 3.38759e-11 │ 314 | # │ 3 │ 2.01029e-10 │ 2.06593e-9 │ 4.23449e-11 │ 315 | 316 | n = 15; 317 | rs = linspace(1/n, 1, n); 318 | p = poly(rs); 319 | residual_check(rs) 320 | 321 | # julia> residual_check(rs) 322 | # 3×3 DataFrames.DataFrame 323 | # │ Row │ Polynomials │ AMVW │ PolynomialRoots │ 324 | # ├─────┼─────────────┼────────────┼─────────────────┤ 325 | # │ 1 │ 2.23049e-6 │ 2.742e-5 │ 1.25911e-7 │ 326 | # │ 2 │ 2.30464e-6 │ 2.74255e-5 │ 2.07169e-7 │ 327 | # │ 3 │ 3.14269e-6 │ 4.83827e-5 │ 3.10753e-7 │ 328 | 329 | 330 | n = 50; 331 | rs = linspace(1/n, 1, n); 332 | p = poly(rs); 333 | residual_check(rs) 334 | 335 | # julia> residual_check(rs) 336 | # 3×3 DataFrames.DataFrame 337 | # │ Row │ Polynomials │ AMVW │ PolynomialRoots │ 338 | # ├─────┼─────────────┼────────────┼─────────────────┤ 339 | # │ 1 │ 0.943682 │ 0.640697 │ 0.594116 │ 340 | # │ 2 │ 0.0213801 │ 0.0183637 │ 0.0446998 │ 341 | # │ 3 │ NaN │ 1.93315e10 │ 0.0488876 │ 342 | 343 | n = 10 344 | ts = linspace(1/n, 1.0, n) * 2pi 345 | rs = [complex(cos(t), sin(t)) for t in ts] 346 | p = poly(rs) 347 | residual_check(rs) 348 | 349 | # 3×3 DataFrames.DataFrame 350 | # │ Row │ Polynomials │ AMVW │ PolynomialRoots │ 351 | # ├─────┼─────────────┼─────────────┼─────────────────┤ 352 | # │ 1 │ 1.90211 │ 1.90211 │ 1.90211 │ 353 | # │ 2 │ 3.39034e-15 │ 3.52069e-15 │ 3.5958e-16 │ 354 | # │ 3 │ 3.39034e-15 │ 3.52069e-15 │ 3.5958e-16 │ 355 | 356 | -------------------------------------------------------------------------------- /src/factorization.jl: -------------------------------------------------------------------------------- 1 | ## 2 | ## initial factorization 3 | ## This is for complex real where we have a D matrix for phases 4 | function QDCB_factorization{T}(state::ComplexRealSingleShift{T}) 5 | 6 | N, ps= state.N, state.POLY 7 | par = iseven(N) ? one(T) : -one(T) 8 | 9 | 10 | Q, Ct, B = state.Q, state.Ct, state.B 11 | state.D[:] = ones(Complex{T}, N+1) 12 | Dn = ones(Complex{T}, 2) 13 | 14 | for ii = 1:(N-1) 15 | vals!(Q[ii], zero(Complex{T}), one(T)) 16 | idx!(Q[ii], ii) 17 | end 18 | vals!(Q[N], one(Complex{T}), zero(T)) 19 | idx!(Q[N], N) 20 | 21 | 22 | ## Working, but not quite what is in DFCC code 23 | ## there -par*ps[N], par*one(T); C is -conj(c), -s 24 | ## B[N] = par, -par... 25 | c, s, temp = givensrot(par * conj(ps[N]), -par * one(T)) # <<<- conj(ps[N])!! 26 | 27 | nrm = norm(c) 28 | alpha = c/nrm 29 | 30 | vals!(Ct[N], conj(c), -s); 31 | idx!(Ct[N], N) 32 | 33 | vals!(B[N], -par*s*alpha, par*norm(c)) 34 | idx!(B[N], N) 35 | 36 | state.D[N] = alpha 37 | state.D[N+1] = conj(alpha) 38 | 39 | for ii in 2:N 40 | c, s, temp = givensrot(-ps[ii-1], temp) 41 | vals!(Ct[N-ii + 1], conj(c*alpha), -s) # note alpha 42 | idx!(Ct[N-ii+1], N-ii+1) 43 | 44 | vals!(B[N-ii + 1], c*alpha, s) # note alpha 45 | idx!(B[N-ii+1], N-ii+1) 46 | end 47 | end 48 | init_state{T}(state::ComplexRealSingleShift{T}) = QDCB_factorization(state) 49 | 50 | ## 51 | ## initial factorization for 52 | ## RealDoubleShift and ComplexComplex 53 | function QCB_factorization{T}(state::ShiftType{T}) 54 | 55 | N, ps= state.N, state.POLY 56 | par = iseven(N) ? one(T) : -one(T) 57 | 58 | 59 | if isa(state, RealDoubleShift) 60 | const ZERO, ONE = zero(T), one(T) 61 | else 62 | const ZERO, ONE = zero(Complex{T}), one(Complex{T}) 63 | end 64 | 65 | 66 | 67 | Q, Ct, B = state.Q, state.Ct, state.B 68 | 69 | for ii = 1:(N-1) 70 | vals!(Q[ii], ZERO, ONE) 71 | idx!(Q[ii], ii) 72 | end 73 | vals!(Q[N], ONE, ZERO) 74 | idx!(Q[N], N) 75 | 76 | 77 | ## Working, but not quite what is in DFCC code 78 | ## there -par*ps[N], par*one(T); C is -conj(c), -s 79 | ## B[N] = par, -par... 80 | c, s, temp = givensrot(par * ps[N], -par * one(T)) 81 | vals!(Ct[N], conj(c), -s); idx!(Ct[N], N) 82 | 83 | vals!(B[N], -par*s, par*conj(c)) 84 | idx!(B[N], N) 85 | 86 | for ii in 2:N 87 | c, s, temp = givensrot(-ps[ii-1], temp) 88 | vals!(Ct[N-ii + 1], conj(c), -s) 89 | idx!(Ct[N-ii+1], N-ii+1) 90 | 91 | vals!(B[N-ii + 1], c, s) 92 | idx!(B[N-ii+1], N-ii+1) 93 | end 94 | 95 | end 96 | init_state{T}(state::ShiftType{T}) = QCB_factorization(state) 97 | 98 | # If there is an issue, this function can be used to resetart the algorithm 99 | # could be merged with init_state? 100 | function restart{T}(state::ShiftType{T}) 101 | # try again 102 | init_state(state) 103 | 104 | for i in 1:state.N 105 | state.REIGS[i] = state.IEIGS[i] = zero(T) 106 | end 107 | state.ctrs.zero_index = 0 108 | state.ctrs.start_index = 1 109 | state.ctrs.stop_index = state.N - 1 110 | state.ctrs.it_count = 0 111 | state.ctrs.tr = state.N - 2 112 | end 113 | 114 | 115 | ### Related to decompostion QR into QC(B + ...) 116 | 117 | 118 | ## we need to find A[k:k+2, k:k+1] for purposes of computing eigenvalues, either 119 | ## to give the shifts or to find the roots after deflation. 120 | ## 121 | ## fill A[k:k+2, k:k+1] k in 2:N 122 | ## updates state.A 123 | ## 124 | # We look for r_j,k. Depending on |j-k| there are different amounts of work 125 | # we have wk = (B + e1 y^t) * ek = B * ek + e1 yk; we consider B* ek only B1 ... Bk D ek applies 126 | # 127 | 128 | # julia> Bi*Bj*Bk * [0,0,1,0] 129 | # julia> rotm(bi1, bi2, 1, 4) * rotm(bj1, bj2, 2, 4) * rotm(bk1, bk2, 3, 4) * [0,0,1,0] 130 | # 4-element Array{SymPy.Sym,1} 131 | # ⎡ ___ ___ ⎤ 132 | # ⎢bk₁⋅bi₂⋅bj₂ ⎥ 133 | # ⎢ ⎥ 134 | # ⎢ ___ ___⎥ 135 | # ⎢-bk₁⋅bi₁⋅bj₂⎥ 136 | # ⎢ ⎥ 137 | # ⎢ ___ ⎥ 138 | # ⎢ bk₁⋅bj₁ ⎥ 139 | # ⎢ ⎥ 140 | # ⎣ bk₂ ⎦ 141 | # which gives [what, wj, wk, wl] 142 | 143 | # For rkk, we have Ck * W = [rkk, 0] 144 | # @vars ck1 ck2 what w1 145 | # u = rotm(ck1, ck2, 1,2) * [what, w1] 146 | # u[1](what => solve(u[2], what)[1]) |> simplify 147 | # ⎛ ___ 2⎞ 148 | # -w₁⋅⎝ck₁⋅ck₁ + ck₂ ⎠ 149 | # ───────────────────── this is rkk = -w1/ck_s 150 | # ck₂ 151 | # 152 | # For r[k-1, k] we need to do more work. We need [what_{k-1}, w_k, w_{k+1}], where w_k, w_{k+1} found from B values as above. 153 | # 154 | # julia> @vars ck1 ck2 cj1 cj2 what w w1 155 | # (ck1, ck2, cj1, cj2, what, w, w1) 156 | 157 | # julia> u = rotm(ck1, ck2, 2, 3) * rotm(cj1, cj2, 1, 3) * [what, w, w1] # C^*_{k} * C^*{k-1} * W = [r_{k-1,k}, r_{k,k}, 0] 158 | # 3-element Array{SymPy.Sym,1} 159 | # ⎡ cj₁⋅ŵ - cj₂⋅w ⎤ 160 | # ⎢ ⎥ 161 | # ⎢ __ ⎥ 162 | # ⎢cj₂⋅ck₁⋅ŵ + ck₁⋅w⋅cj₁ - ck₂⋅w₁⎥ 163 | # ⎢ ⎥ 164 | # ⎢ ___ ___⎥ 165 | # ⎣cj₂⋅ck₂⋅ŵ + ck₂⋅w⋅cj₁ + w₁⋅ck₁⎦ 166 | 167 | 168 | 169 | # julia> u[1](what => solve(u[3], what)[1]) |> simplify 170 | # 2 171 | # cj₁ ⋅w cj₁⋅ck₁⋅w₁ 172 | # - ────── - ────────── - cj₂⋅w 173 | # cj₂ cj₂⋅ck₂ 174 | 175 | ## or -(w + cj * ck/sk * w1) / sj 176 | 177 | # 178 | # For r_{k-2,k} we need to reach back one more step 179 | # C^*_{k} * C^*{k-1} * C^*_{k-2} W = [r_{k-2,k} r_{k-1,k}, r_{k,k}, 0] 180 | # 181 | # julia> @vars ck1 ck2 cj1 cj2 ci1 ci2 what wm1 w w1 182 | # julia> u = rotm(ck1, ck2, 3, 4) * rotm(cj1, cj2, 2, 4) * rotm(ci1, ci2, 1, 4) * [what, wm1, w, w1] 183 | # julia> u[1](what => solve(u[4], what)[1]) |> simplify 184 | # 2 185 | # ci₁ ⋅wm₁ ci₁⋅cj₁⋅w ci₁⋅ck₁⋅w₁ 186 | # - ──────── - ───────── - ─────────── - ci₂⋅wm₁ 187 | # ci₂ ci₂⋅cj₂ ci₂⋅cj₂⋅ck₂ 188 | # 189 | # of -(wm1 + (ci*cj/sj)*w + (ci*ck) / (sj * sk) * w1) / si 190 | # 191 | # This will have problems if any of si, sj or sk are 0. This happens if the 192 | # Ct[k] become trivial. Theorem 4.1 ensures this can't happen mathematically 193 | # though numerically, this is a different matter. The bound involves 1/||p|| which can be smaller than machine precision for, say, Wilknson(20) 194 | # 195 | 196 | 197 | # D values are only for ComplexRealSingleShift 198 | getD(state::ComplexRealSingleShift, k::Int) = state.D[k] 199 | getD{T}(state::ShiftType{T}, k::Int) = one(T) 200 | 201 | function diagonal_block{T}(state::ShiftType{T}, k) 202 | k >= 2 && k <= state.N || error("$k not in [2,n]") 203 | 204 | A = state.A 205 | R = state.R # temp storage 206 | 207 | Q,Ct,B = state.Q, state.Ct, state.B 208 | 209 | if k == 2 210 | Bj_c, Bj_s = vals(B[k-1]); Bk_c, Bk_s = vals(B[k]) 211 | Cj_c, Cj_s = vals(Ct[k-1]); Ck_c, Ck_s = vals(Ct[k]) 212 | Qj_c, Qj_s = vals(Q[k-1]); Qk_c, Qk_s = vals(Q[k]) 213 | 214 | 215 | # # here we only need [r11 r12; 0 r22] 216 | # k=2 this is r_kk, r_k-1,k 217 | 218 | # for k 219 | wl = Bk_s 220 | wk = conj(Bj_c) * Bk_c 221 | 222 | # rkk = -w_{k+1} / ck_s 223 | R[2,2] = - wl / Ck_s 224 | 225 | # r_{k-1,k} = -(wk + cj_c * conj(ck_c) / ck_s *wl)/cj_s 226 | R[1,2] = - (wk + Cj_c * conj(Ck_c) / Ck_s * wl) / Cj_s 227 | 228 | # for k - 1 we have (l=k) 229 | wl = Bj_s 230 | R[1,1] = - wl / Cj_s 231 | R[2,1] = complex(zero(T)) 232 | 233 | alpha, beta = getD(state, k-1), getD(state, k) 234 | R[1,1] *= alpha; R[1,2] *= alpha 235 | R[2,1] *= beta; R[2,2] *= beta 236 | 237 | # 3×2 Array{SymPy.Sym,2} 238 | # ⎡ ___⎤ 239 | # ⎢R₁₁⋅qjc R₁₂⋅qjc - R₂₂⋅qkc⋅qjs⎥ 240 | # ⎢ ⎥ 241 | # ⎢ ___⎥ 242 | # ⎢R₁₁⋅qjs R₁₂⋅qjs + R₂₂⋅qkc⋅qjc⎥ 243 | # ⎢ ⎥ 244 | # ⎣ 0 R₂₂⋅qks ⎦ 245 | 246 | A[1,1] = R[1,1] * Qj_c 247 | A[2,1] = R[1,1] * Qj_s 248 | A[1,2] = R[1,2] * Qj_c - R[2,2] * Qk_c * conj(Qj_s) 249 | A[2,2] = R[1,2] * Qj_s + R[2,2] * Qk_c * conj(Qj_c) 250 | 251 | else 252 | 253 | Bi_c, Bi_s = vals(B[k-2]); Bj_c, Bj_s = vals(B[k-1]); Bk_c, Bk_s = vals(B[k]) 254 | Ci_c, Ci_s = vals(Ct[k-2]); Cj_c, Cj_s = vals(Ct[k-1]); Ck_c, Ck_s = vals(Ct[k]) 255 | Qi_c, Qi_s = vals(Q[k-2]); Qj_c, Qj_s = vals(Q[k-1]); Qk_c, Qk_s = vals(Q[k]) 256 | 257 | 258 | # for k 259 | wl = Bk_s 260 | wk = conj(Bj_c) * Bk_c 261 | wj = - conj(Bi_c) * conj(Bj_s) * Bk_c 262 | 263 | R[3,2] = - wl / Ck_s 264 | R[2,2] = - (wk + Cj_c * conj(Ck_c) / Ck_s * wl) / Cj_s 265 | 266 | # -(wj + ci_c * conj(cj_c) / cj_s * wk + ci_c * conj(ck_c) / (cj_s * ck_s) * wl)/ci_s 267 | R[1,2] = -(wj + Ci_c * conj(Cj_c) / Cj_s * wk + 268 | Ci_c * conj(Ck_c) / (Cj_s * Ck_s) * wl) / Ci_s 269 | 270 | # downshift C indexes l->k; k->j; j->i; but keep w's (confusing) 271 | wl = Bj_s 272 | wk = conj(Bi_c) * Bj_c 273 | R[2,1] = - wl / Cj_s 274 | R[1,1] = - (wk + Ci_c * conj(Cj_c) / Cj_s * wl) / Ci_s 275 | R[3,1] = zero(T) 276 | 277 | alpha, beta, gamma = getD(state, k-2), getD(state, k-1), getD(state, k) 278 | R[1,1] *= alpha; R[1,2] *= alpha 279 | R[2,1] *= beta; R[2,2] *= beta 280 | R[3,1] *= gamma; R[3,2] *= gamma 281 | 282 | # make Qs from multiplying rotators 283 | # make Rs = [Sym("r$i$j") for i in 1:5, j in 1:5] |> triu 284 | # julia> (Qs * Rs)[2:4, 2:3] ## but indexing of r's is off! j-1 needed 285 | # 3×2 Array{SymPy.Sym,2} 286 | # ⎡ ___ ___ ___⎤ 287 | # ⎢q1s⋅r₁₂ + q2c⋅r₂₂⋅q1c q1s⋅r₁₃ + q2c⋅r₂₃⋅q1c - q2s⋅q3c⋅r₃₃⋅q1c⎥ 288 | # ⎢ ⎥ 289 | # ⎢ ___ ⎥ 290 | # ⎢ q2s⋅r₂₂ q2s⋅r₂₃ + q3c⋅r₃₃⋅q2c ⎥ 291 | # ⎢ ⎥ 292 | # ⎣ 0 q3s⋅r₃₃ ⎦ 293 | # 2×2 Array{SymPy.Sym,2} 294 | # ⎡ ___ ___ ___ ___⎤ 295 | # ⎢R₁₂⋅qis + R₂₂⋅qjc⋅qic R₁₃⋅qis + R₂₃⋅qjc⋅qic - R₃₃⋅qkc⋅qic⋅qjs⎥ 296 | # ⎢ ⎥ 297 | # ⎢ ___ ⎥ 298 | # ⎣ R₂₂⋅qjs R₂₃⋅qjs + R₃₃⋅qkc⋅qjc ⎦ 299 | 300 | 301 | A[1,1] = R[1,1] * Qi_s + R[2,1] * conj(Qi_c) * Qj_c 302 | A[2,1] = R[2,1] * Qj_s 303 | A[1,2] = R[1,2] * Qi_s + R[2,2] * conj(Qi_c) * Qj_c - R[3,2] * conj(Qi_c) * conj(Qj_s) * Qk_c 304 | A[2,2] = R[2,2] * Qj_s + R[3,2] * conj(Qj_c) * Qk_c 305 | 306 | end 307 | 308 | false 309 | end 310 | 311 | ################################################## 312 | 313 | # [a11 - l a12; a21 a22] -> l^2 -2 * (tr(A)/2) l + det(A) 314 | # so we use b = tr(A)/2 for qdrtc routing 315 | function eigen_values{T}(state::RealDoubleShift{T}) 316 | 317 | a11, a12 = state.A[1,1], state.A[1,2] 318 | a21, a22 = state.A[2,1], state.A[2,2] 319 | 320 | b = (a11 + a22) * (0.5) 321 | c = a11 * a22 - a12 * a21 322 | 323 | state.e1[1], state.e1[2], state.e2[1], state.e2[2] = qdrtc(one(T), b, c) 324 | complex(state.e1[1], state.e1[2]), complex(state.e2[1], state.e2[2]) 325 | end 326 | 327 | # from `modified_quadratic.f90` 328 | function eigen_values{T}(state::ShiftType{T}) 329 | 330 | a11, a12 = state.A[1,1], state.A[1,2] 331 | a21, a22 = state.A[2,1], state.A[2,2] 332 | 333 | tr = a11 + a22 334 | detm = a11 * a22 - a21 * a12 335 | disc = sqrt(tr * tr - 4.0 * detm) 336 | 337 | u = abs(tr + disc) > abs(tr - disc) ? tr + disc : tr - disc 338 | if iszero(u) 339 | state.e1[1], state.e1[2] = zero(T), zero(T) 340 | state.e2[1], state.e2[2] = zero(T), zero(T) 341 | else 342 | e1 = u / 2.0 343 | e2 = detm / e1 344 | state.e1[1], state.e1[2] = real(e1), imag(e1) 345 | state.e2[1], state.e2[2] = real(e2), imag(e2) 346 | end 347 | 348 | complex(state.e1[1], state.e1[2]), complex(state.e2[1], state.e2[2]) 349 | end 350 | 351 | 352 | ################################################## 353 | ## Deflation 354 | ## when a Q[k] matrix become a "D" matrix, we deflate. This is checked by the sine term being basically 0. 355 | function check_deflation{T}(state::ShiftType{T}, tol = eps(T)) 356 | for k in state.ctrs.stop_index:-1:state.ctrs.start_index 357 | if abs(vals(state.Q[k])[2]) <= tol 358 | deflate(state, k) 359 | return 360 | end 361 | end 362 | end 363 | 364 | # deflate a term 365 | # turn on `show_status` to view sequence 366 | function deflate{T}(state::ShiftType{T}, k) 367 | 368 | # make a D matrix 369 | c,s = vals(state.Q[k]) 370 | vals!(state.Q[k], c, zero(T)) # zero out s, will renormalize c 371 | 372 | # shift zero counter 373 | state.ctrs.zero_index = k # points to a matrix Q[k] either RealRotator(-1, 0) or RealRotator(1, 0) 374 | state.ctrs.start_index = k + 1 375 | 376 | # reset counter 377 | state.ctrs.it_count = 1 378 | end 379 | 380 | 381 | # deflate a term 382 | # deflation for ComplexReal is different, as 383 | # we replace Qi with I and move diagonal part into D 384 | function deflate{T}(state::ComplexRealSingleShift{T}, k) 385 | 386 | # when we deflate here we want to leave Q[k] = I and 387 | # move Dk matrix over to merge with D 388 | # we do this by m 389 | # Qi Qi Dk Qi 390 | # Qj -> Dk Qj --> Dk Qj and so on until we get to start_index 391 | # Dk Ik Dk Ik Dk Ik 392 | # 393 | # then the Dk's are collected into [alpa 0; I; 0 conj(alpha)] (start,k+1) 394 | 395 | alpha, s = vals(state.Q[k]) 396 | vals!(state.Q[k], one(Complex{T}), zero(T)) # I 397 | 398 | cascade(state.Q, state.D, alpha, k, state.ctrs.stop_index) 399 | 400 | # shift zero counter 401 | state.ctrs.zero_index = k # points to a matrix Q[k] either RealRotator(-1, 0) or RealRotator(1, 0) 402 | state.ctrs.start_index = k + 1 403 | 404 | # reset counter 405 | state.ctrs.it_count = 1 406 | end 407 | 408 | 409 | ################################################## 410 | 411 | 412 | 413 | -------------------------------------------------------------------------------- /src/bulge.jl: -------------------------------------------------------------------------------- 1 | ## Bulge chasing 2 | 3 | ## chase bulge from top to bottom until final absorbtion 4 | function bulge_step{T}(state::ShiftType{T}) 5 | create_bulge(state) 6 | prepare_bulge(state) 7 | chase_bulge(state) 8 | absorb_bulge(state) 9 | end 10 | 11 | 12 | 13 | ## RealDoubleShift 14 | ## 15 | ## There are two rotators, U, V, to chase through the matrix using the following operations 16 | ## [ [ 17 | ## a unitary transform: basically U [ --> [ U; as we just hit both sides by U' A U and U' U is I 18 | ## [ [ 19 | ## 20 | ## 21 | ## A fuse [ [ -> [ 22 | ## 23 | ## A turnover [ M --> [ where M moves through a descending or ascending structure 24 | ## [ M [ 25 | ## 26 | ## a "D" flip: D ---> D 27 | ## [ [ 28 | ## 29 | 30 | 31 | ## The bulge is created by (A-rho1) * (A - rho2) * e_1 where rho1 and rho2 are eigenvalue or random 32 | function create_bulge{T}(state::RealDoubleShift{T}) 33 | 34 | if mod(state.ctrs.it_count, 15) == 0 35 | 36 | t = rand() * pi 37 | re1, ie1 = cos(t), sin(t) 38 | re2, ie2 = re1, -ie1 39 | 40 | vals!(state.U, re1, ie1); idx!(state.U, state.ctrs.start_index) 41 | vals!(state.Ut, re1, -ie1); idx!(state.Ut, state.ctrs.start_index) 42 | 43 | vals!(state.V, re2, ie2); idx!(state.V, state.ctrs.start_index + 1) 44 | vals!(state.Vt, re2, -ie2); idx!(state.Vt, state.ctrs.start_index + 1) 45 | 46 | else 47 | 48 | # compute (A-rho1) * (A - rho2) * e_1 49 | # find e1, e2 50 | 51 | flag = diagonal_block(state, state.ctrs.stop_index+1) 52 | eigen_values(state) 53 | l1r, l1i = state.e1 54 | l2r, l2i = state.e2 55 | 56 | # find first part of A[1:3, 1:2] 57 | Bk = state.Bk 58 | flag = flag | diagonal_block(state, state.ctrs.start_index+1) 59 | 60 | bk11, bk12 = state.A[1,1], state.A[1,2] 61 | bk21, bk22 = state.A[2,1], state.A[2,2] 62 | 63 | # find last part 64 | flag = flag | diagonal_block(state, state.ctrs.start_index+2) 65 | # Bk[3,2] = state.A[2, 1] 66 | bk32 = state.A[2,1] 67 | 68 | # an issue... restart 69 | # if isnan(l1r) || isnan(l1i) || isnan(l2r) || isnan(l2i) 70 | # ## eigvals gone awry 71 | # restart(state) 72 | # return create_bulge(state) 73 | # end 74 | 75 | 76 | 77 | # if !flag # flag is false if there is an issue 78 | # restart(state) 79 | # return create_bulge(state) 80 | # end 81 | 82 | # make first three elements of c1,c2,c3 83 | # c1 = real(-l1i⋅l2i + ⅈ⋅l1i⋅l2r - ⅈ⋅l1i⋅t₁₁ + ⅈ⋅l1r⋅l2i + l1r⋅l2r - l1r⋅t₁₁ - ⅈ⋅l2i⋅t₁₁ - l2r⋅t₁₁ + t₁₁^2 + t₁₂⋅t₂₁) 84 | # c2 = real(-ⅈ⋅l1i⋅t₂₁ - l1r⋅t₂₁ - ⅈ⋅l2i⋅t₂₁ - l2r⋅t₂₁ + t₁₁⋅t₂₁ + t₂₁⋅t₂₂) 85 | # c3 = real(t₂₁⋅t₃₂) 86 | 87 | c1 = -l1i * l2i + l1r*l2r -l1r*bk11 -l2r * bk11 + bk11^2 + bk12 * bk21 88 | c2 = -l1r * bk21 - l2r * bk21 + bk11* bk21 + bk21 * bk22 89 | c3 = bk21 * bk32 90 | 91 | 92 | c, s, nrm = givensrot(c2, c3) 93 | j = state.ctrs.start_index + 1 94 | 95 | vals!(state.V, c, -s) 96 | idx!(state.V, j) 97 | 98 | c, s, tmp = givensrot(c1, nrm) 99 | 100 | vals!(state.U, c, -s) 101 | idx!(state.U, j-1) 102 | end 103 | 104 | end 105 | 106 | ## make W on left side 107 | # 108 | # initial Q0 109 | # we do turnover U1' Q1 --> U1' --> U1' --> Q1 110 | # V1' Q2 Q1 V1' Q2 Q1 (V1'Q2) W1 Q2 111 | # 112 | # With this, W will be kept on the left side until the last step, U,V 113 | # move through left to right by one step, right to left by unitariness 114 | # 115 | # Q0 Q0 Q0 Q0 116 | # U1' Q1 U1' Q1* -> U1 --> U1* 117 | # V1' Q3 -> V1' Q3 Q1** V1' Q3 W (V1'Q3) 118 | # 119 | # Q0 is (p,0) rotator, p 1 or -1. We have 120 | # Q0 --> Q0 121 | # R (r, pr2) 122 | function prepare_bulge{T}(state::RealDoubleShift{T}) 123 | 124 | # N = state.N 125 | # as_full(state.V', N+1)* as_full(state.U', N+1)* full(state) * as_full(state.U, N+1) * as_full(state.V, N+1) |> eigvals |> println 126 | 127 | 128 | k = state.ctrs.start_index 129 | 130 | vals!(state.Ut, state.U.c, -state.U.s); idx!(state.Ut, idx(state.U)) 131 | vals!(state.Vt, state.V.c, -state.V.s); idx!(state.Vt, idx(state.V)) 132 | 133 | copy!(state.W, state.Q[k]) 134 | p = k == 1 ? one(T) : state.Q[k-1].c # zero index implies Q0 = RR(1,0) or RR(-1,0) 135 | dflip(state.W, p) 136 | 137 | turnover(state.Ut, state.Vt, state.W, Val{:right}) 138 | fuse(state.Vt, state.Q[k+1], Val{:right}) # V' Q3 139 | dflip(state.Ut, p) 140 | vals!(state.Q[k], state.Ut.c, state.Ut.s) 141 | 142 | end 143 | 144 | ## Bulge chasing moves U, V fr from R to L through B then C then Q where an interaction with W allows 145 | ## a subsequent unitary operation to move U,V back to the right, one step down 146 | ## The case when Ct[i] and B[i] are identical allow a speed up. 147 | 148 | function one_bulge_chase_shortcut{T}(state::RealDoubleShift{T}) 149 | i = idx(state.V) 150 | # borrow Vt, Ut here to store a copy 151 | copy!(state.Vt, state.V) 152 | copy!(state.Ut, state.U) 153 | 154 | turnover(state.B[i], state.B[i+1], state.Vt, Val{:right}) 155 | turnover(state.B[i-1], state.B[i], state.Ut, Val{:right}) 156 | for k in -1:1 157 | a,b = vals(state.B[i+k]) 158 | vals!(state.Ct[i+k], a, -b) # using copy!(Ct, B') is slower 159 | end 160 | 161 | turnover(state.Q[i], state.Q[i+1], state.V, Val{:right}) 162 | turnover(state.Q[i-1], state.Q[i], state.U, Val{:right}) 163 | turnover(state.W, state.V, state.U, Val{:left}) 164 | 165 | end 166 | 167 | function one_bulge_chase{T}(state::RealDoubleShift{T}) 168 | i = idx(state.V) 169 | turnover(state.B[i], state.B[i+1], state.V, Val{:right}) 170 | turnover(state.Ct[i+1], state.Ct[i], state.V, Val{:right}) 171 | 172 | 173 | j = i - 1 174 | turnover(state.B[j], state.B[j+1], state.U, Val{:right}) 175 | turnover(state.Ct[j+1], state.Ct[j], state.U, Val{:right}) 176 | 177 | turnover(state.Q[i], state.Q[i+1], state.V, Val{:right}) 178 | turnover(state.Q[j], state.Q[j+1], state.U, Val{:right}) 179 | turnover(state.W, state.V, state.U, Val{:left}) 180 | 181 | end 182 | 183 | 184 | function chase_bulge{T}(state::RealDoubleShift{T}) 185 | 186 | # println(" begin chase at level $(state.V.i)") 187 | # as_full(state.W, state.N+1)* full(state) * as_full(state.V, state.N+1) * as_full(state.U, state.N+1) |> eigvals |> println 188 | 189 | # one step 190 | i = idx(state.V) 191 | 192 | ## When i < tr C_i = B_i. This happens in the early steps 193 | ## this means fewer turnovers, but at a price of more allocations 194 | while i < state.ctrs.stop_index # loops from start_index to stop_index - 1 195 | if i <= state.ctrs.tr 196 | one_bulge_chase_shortcut(state) 197 | else 198 | one_bulge_chase(state) 199 | end 200 | 201 | i += 1 202 | 203 | end 204 | 205 | # println("end chase") 206 | # as_full(state.W, state.N+1)* full(state) * as_full(state.V, state.N+1) * as_full(state.U, state.N+1) |> eigvals |> println 207 | end 208 | 209 | 210 | ## Bulge is absorbed by moving V through, then U going throug two trips. 211 | function absorb_bulge{T}(state::RealDoubleShift{T}) 212 | 213 | # println("absorb 0") 214 | # as_full(state.W, state.N+1) * full(state) * as_full(state.V, state.N+1) * as_full(state.U, state.N+1) |> eigvals |> println 215 | 216 | 217 | # first V goes through B, C then fuses with Q 218 | i = idx(state.V) 219 | 220 | turnover(state.B[i], state.B[i+1], state.V, Val{:right}) 221 | turnover(state.Ct[i+1], state.Ct[i], state.V, Val{:right}) 222 | 223 | ## We may be fusing Q P --> (Q') 224 | # RR(-1,0) RR(-1,0) 225 | # 226 | 227 | p = getd(state.Q[i+1]) 228 | dflip(state.V, p) 229 | fuse(state.Q[i], state.V, Val{:left}) # fuse Q*V -> Q 230 | 231 | 232 | # println("absorb 1") 233 | # as_full(state.W, state.N+1) * full(state) * as_full(state.U, state.N+1) |> eigvals |> println 234 | 235 | 236 | # Then bring U through B, C, and Q to fuse with W 237 | j = idx(state.U) 238 | turnover(state.B[j], state.B[j+1], state.U) 239 | turnover(state.Ct[j+1], state.Ct[j], state.U) 240 | turnover(state.Q[j], state.Q[j+1], state.U) 241 | fuse(state.W, state.U, Val{:right}) 242 | 243 | # println("absorb 2") 244 | # as_full(state.U, state.N+1) * full(state) |> eigvals |> println 245 | 246 | # similarity transformation, bring through then fuse with Q 247 | j = idx(state.U) 248 | turnover(state.B[j], state.B[j+1], state.U, Val{:right}) 249 | turnover(state.Ct[j+1], state.Ct[j], state.U) 250 | p = getd(state.Q[j+1]) 251 | dflip(state.U, p) 252 | fuse(state.Q[j], state.U, Val{:left}) 253 | 254 | # println("absorb final") 255 | # full(state) |> eigvals |> println 256 | end 257 | 258 | 259 | 260 | ################################################## 261 | ## ComplexRealSingleShift 262 | 263 | function create_bulge{T}(state::ComplexRealSingleShift{T}) 264 | 265 | if mod(state.ctrs.it_count, 15) == 0 266 | 267 | t = rand() * pi 268 | if state.ray 269 | shift = complex(cos(t), sin(t)) 270 | else 271 | shift = complex(cos(t), zero(T)) 272 | end 273 | 274 | else 275 | 276 | flag = diagonal_block(state, state.ctrs.stop_index+1) 277 | if state.ray 278 | e1, e2 = eigen_values(state) 279 | shift = norm(state.A[2,2] - e1) < norm(state.A[2,2] - e2) ? e1 : e2 280 | else 281 | shift = state.A[2,2] 282 | end 283 | 284 | end 285 | 286 | flag = diagonal_block(state, state.ctrs.start_index+1) 287 | c,s,nrm = givensrot(state.A[1,1] - shift, state.A[2,1]) 288 | 289 | vals!(state.U, conj(c), -s) # U is the inverse of what we just found, 290 | idx!(state.U, state.ctrs.start_index) 291 | 292 | vals!(state.Ut, c, s) 293 | idx!(state.Ut, idx(state.U)) 294 | nothing 295 | end 296 | 297 | 298 | ## 299 | ## U Qi (fUQi) Di (fUQi) Di (fUQi) 300 | ## Qj --> Qj --> Qj Dj --> Qj D 301 | ## Qk Qk Qk Dk Qk 302 | function prepare_bulge{T}(state::ComplexRealSingleShift{T}) 303 | i = idx(state.Ut) 304 | 305 | # when deflating here we ensure Q[i-1] is an identity matrix 306 | # so no dflip 307 | 308 | alpha = fuse(state.Ut, state.Q[i], Val{:right}) 309 | cascade(state.Q, state.D, alpha, i, state.ctrs.stop_index) 310 | 311 | end 312 | 313 | ## 314 | function one_bulge_chase_shortcut{T}(state::ComplexRealSingleShift{T}) 315 | ## savings are one fewer turnover, a few copies 316 | i = idx(state.U) 317 | 318 | 319 | copy!(state.Ut, state.U) 320 | turnover(state.B[i], state.B[i+1], state.Ut, Val{:right}) 321 | for k in 0:1 322 | a,b = vals(state.B[i+k]) 323 | vals!(state.Ct[i+k], conj(a), -b) # using copy!(Ct, B') is slower 324 | end 325 | 326 | i = idx(state.U) 327 | # passthrough(view(state.D,i:(i+1)), state.U) 328 | state.Dp[1], state.Dp[2] = state.D[i], state.D[i+1] 329 | passthrough(state.Dp, state.U) 330 | state.D[i],state.D[i+1] = state.Dp[1], state.Dp[2] 331 | 332 | turnover(state.Q[i], state.Q[i+1], state.U, Val{:right}) 333 | 334 | end 335 | 336 | # Moving QCBU_i -> QC(BUi) -> QCUi1B -> Q(CUi1)B -> QUiCB -> Ui1 Q C B 337 | function one_bulge_chase{T}(state::ComplexRealSingleShift{T}) 338 | # can consolidate first turnovers 339 | i = idx(state.U) 340 | turnover(state.B[i], state.B[i+1], state.U, Val{:right}) 341 | turnover(state.Ct[i+1], state.Ct[i], state.U, Val{:right}) 342 | # passthrough(view(state.D,i:(i+1)), state.U) #allocates 343 | state.Dp[1], state.Dp[2] = state.D[i], state.D[i+1] 344 | passthrough(state.Dp, state.U) 345 | state.D[i],state.D[i+1] = state.Dp[1], state.Dp[2] 346 | 347 | turnover(state.Q[i], state.Q[i+1], state.U, Val{:right}) 348 | end 349 | 350 | # Q Ct B D U -> Q Ct B (D U) -> Q Ct (B U) D -> Q (Ct U) B D -> 351 | # (Q U) Ct B D -> U Q Ct B D then wrap via unitary operation 352 | function chase_bulge{T}(state::ComplexRealSingleShift{T}) 353 | 354 | # one step 355 | i = idx(state.U) 356 | 357 | ## When i < tr C_i = B_i. This happens in the early steps 358 | ## this means fewer turnovers, but at a price of more allocations 359 | while i < state.ctrs.stop_index # loops from start_index to stop_index - 1 360 | if i <= state.ctrs.tr 361 | one_bulge_chase_shortcut(state) 362 | else 363 | one_bulge_chase(state) 364 | end 365 | i += 1 366 | 367 | end 368 | 369 | end 370 | 371 | ## 372 | ## pass through Ct*B with two turnovers 373 | ## pass throudh D 374 | ## fuse Q & U 375 | ## absorb phase into D 376 | function absorb_bulge{T}(state::ComplexRealSingleShift{T}) 377 | i = idx(state.U) 378 | 379 | turnover(state.B[i], state.B[i+1], state.U, Val{:right}) 380 | turnover(state.Ct[i+1], state.Ct[i], state.U, Val{:right}) 381 | 382 | 383 | # passthrough(view(state.D, i:(i+1)), state.U) 384 | state.Dp[1], state.Dp[2] = state.D[i], state.D[i+1] 385 | passthrough(state.Dp, state.U) 386 | state.D[i],state.D[i+1] = state.Dp[1], state.Dp[2] 387 | 388 | 389 | # fuse and then take care of new alpha by moving into state.D 390 | alpha = fuse(state.Q[i], state.U, Val{:left}) 391 | 392 | state.D[i] *= alpha 393 | state.D[i+1] *= conj(alpha) 394 | 395 | end 396 | 397 | ################################################## 398 | 399 | 400 | ################################################## 401 | 402 | ## ComplexComplexSingleShift 403 | function create_bulge{T}(state::ComplexComplexSingleShift{T}) 404 | 405 | if mod(state.ctrs.it_count, 15) == 0 406 | 407 | t = rand() * pi 408 | if state.ray 409 | shift = complex(cos(t), sin(t)) 410 | else 411 | shift = complex(cos(t), zero(T)) 412 | end 413 | 414 | else 415 | 416 | flag = diagonal_block(state, state.ctrs.stop_index+1) 417 | if state.ray 418 | e1, e2 = eigen_values(state) 419 | shift = norm(state.A[2,2] - e1) < norm(state.A[2,2] - e2) ? e1 : e2 420 | else 421 | shift = state.A[2,2] 422 | end 423 | 424 | end 425 | 426 | flag = diagonal_block(state, state.ctrs.start_index+1) 427 | c,s,nrm = givensrot(state.A[1,1] - shift, state.A[2,1]) 428 | 429 | vals!(state.U, conj(c), -s) # U is the inverse of what we just found, 430 | idx!(state.U, state.ctrs.start_index) 431 | vals!(state.Ut, c, s) 432 | idx!(state.Ut, idx(state.U)) 433 | nothing 434 | end 435 | 436 | 437 | 438 | ## 439 | ## D D D 440 | ## U' Q --> V Q --> (VQ) 441 | ## 442 | ## with D = D(alpha); U' = (u1, v1); V = (u1 conj(alpha), v1) 443 | function prepare_bulge{T}(state::ComplexComplexSingleShift{T}) 444 | i = idx(state.Ut) 445 | if i > 1 446 | # if previously deflated, the prior is only diagonal 447 | # so may have trouble passing Ut to Q[i] 448 | Dflip(state.Ut, state.Q[i-1]) 449 | end 450 | fuse(state.Ut, state.Q[i], Val{:right}) 451 | end 452 | 453 | ## 454 | function one_bulge_chase_shortcut{T}(state::ComplexComplexSingleShift{T}) 455 | ## XXX speed up goes here, as we only need turnover through B, not C 456 | ## savings are one fewer turnover, a few copies 457 | one_bulge_chase(state) 458 | end 459 | # Moving QCBU_i -> QC(BUi) -> QCUi1B -> Q(CUi1)B -> QUiCB -> Ui1 Q C B 460 | function one_bulge_chase{T}(state::ComplexComplexSingleShift{T}) 461 | i = idx(state.U) 462 | turnover(state.B[i], state.B[i+1], state.U, Val{:right}) 463 | turnover(state.Ct[i+1], state.Ct[i], state.U, Val{:right}) 464 | turnover(state.Q[i], state.Q[i+1], state.U, Val{:right}) 465 | end 466 | 467 | # Q Ct B D U -> Q Ct B (D U) -> Q Ct (B U) D -> Q (Ct U) B D -> 468 | # (Q U) Ct B D -> U Q Ct B D then wrap via unitary operation 469 | function chase_bulge{T}(state::ComplexComplexSingleShift{T}) 470 | 471 | # one step 472 | i = idx(state.U) 473 | 474 | ## When i < tr C_i = B_i. This happens in the early steps 475 | ## this means fewer turnovers, but at a price of more allocations 476 | while i < state.ctrs.stop_index # loops from start_index to stop_index - 1 477 | if i <= state.ctrs.tr 478 | one_bulge_chase_shortcut(state) 479 | else 480 | one_bulge_chase(state) 481 | end 482 | 483 | i += 1 484 | 485 | end 486 | 487 | end 488 | 489 | # We have Q Ct B D U -> Q Ct B (U D) -> Q Ct (B U) D -> Q (Ct U) B D -> 490 | # (Q U) Ct B D -> Q Di Ct B D -> Q (Di Ct) B D -> Q Ct (Di B) D -> Q Ct B (Di D) 491 | # Q CB(Ui) -> QUi C B -> (QUi 492 | # I 493 | function absorb_bulge{T}(state::ComplexComplexSingleShift{T}) 494 | i = idx(state.U) 495 | 496 | turnover(state.B[i], state.B[i+1], state.U, Val{:right}) 497 | turnover(state.Ct[i+1], state.Ct[i], state.U, Val{:right}) 498 | i < state.N && Dflip(state.U, state.Q[i+1]) 499 | fuse(state.Q[i], state.U, Val{:left}) 500 | end 501 | 502 | ################################################## 503 | 504 | --------------------------------------------------------------------------------