├── .DS_Store ├── complex_torch_var.py ├── default_params.py ├── localized_noise.py ├── mnist.py ├── noisy_fidelity.py ├── noisy_prob.py ├── noisy_test.py ├── optical_nn.py ├── plot_accuracies.py ├── results ├── .DS_Store ├── fft_fid.npy ├── fft_net_diff_depth │ ├── 1.npy │ ├── 2.npy │ ├── 3.npy │ └── 4.npy ├── localized_noise │ ├── U.npy │ ├── U_fft.npy │ ├── U_ord.npy │ ├── V.npy │ ├── V_fft.npy │ └── V_ord.npy ├── noisy_test │ ├── FFTNet_diag.npy │ ├── FFTNet_psbs.npy │ ├── GridNet_diag.npy │ ├── GridNet_ordered_SV_diag.npy │ ├── GridNet_psbs.npy │ ├── fft_net_diff_nh │ │ ├── 1024.npy │ │ ├── 256.npy │ │ └── 4096.npy │ ├── grid_1_layer.npy │ ├── stacked_fft_1.npy │ └── stacked_fft_diag.npy ├── stacked_fft_32_fid.npy ├── trunc_fid.npy └── unitary_fidelity.npy ├── train_mnist.py ├── trained_models ├── .DS_Store ├── GridUnitary_256.pth ├── _.pth ├── cgrd.pth ├── complex_1_layer.pth ├── complex_net.pth ├── complex_net │ ├── 1550179045 │ ├── 1550179474 │ ├── 1550179545 │ ├── 1550179616 │ ├── 1550179687 │ ├── 1550179759 │ ├── 1550179830 │ ├── 1550179902 │ ├── 1550179973 │ ├── 1550180044 │ └── 1550180115 ├── fft_net.pth ├── fft_net_16384_.pth ├── fft_net_diff_depth │ ├── 1 │ ├── 2 │ ├── 3 │ ├── 4 │ └── 5 ├── fft_net_diff_nh │ ├── 256 │ ├── 1024 │ └── 4096 ├── fft_net_nh ├── grid_1_layer.pth ├── grid_net.pth ├── grid_net │ ├── 1550179045_grid │ ├── 1550179474_grid │ ├── 1550179545_grid │ ├── 1550179616_grid │ ├── 1550179687_grid │ ├── 1550179759_grid │ ├── 1550179830_grid │ ├── 1550179902_grid │ ├── 1550179973_grid │ ├── 1550180044_grid │ └── 1550180115_grid ├── grid_net_ordered_SV.pth ├── grid_ord_net │ ├── 1550179045 │ ├── 1550179474 │ ├── 1550179545 │ ├── 1550179616 │ ├── 1550179687 │ ├── 1550179759 │ ├── 1550179830 │ ├── 1550179902 │ ├── 1550179973 │ ├── 1550180044 │ └── 1550180115 ├── stacked_fft_1.pth ├── stacked_fft_32.pth └── truncated_grid.pth └── unitary_decomp.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/.DS_Store -------------------------------------------------------------------------------- /complex_torch_var.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | import numpy as np 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | from torch.nn.parameter import Parameter 7 | from scipy.stats import unitary_group 8 | 9 | def rand_unitary(N): 10 | """ 11 | Returns a N x N randomly sampled unitary matrix represented as th.tensor 12 | """ 13 | U = unitary_group.rvs(N) 14 | U_real = th.tensor(U.real) 15 | U_imag = th.tensor(U.imag) 16 | return make_complex_matrix(U_real, U_imag) 17 | def make_complex_matrix(real, imag=None): 18 | """ 19 | Creates a real representation of complex matrix of the form 20 | (real, -imag) 21 | (imag, real) 22 | Input: 23 | real: (N x M) th.tensor, real component of matrix 24 | imag: (N x M) th.tensor, imag component of matrix. If none given, assumed to be zeros. 25 | Returns: 26 | A (2N x 2M) th.tensor, a real representation of complex matrix 27 | """ 28 | 29 | if imag is None: 30 | imag = real * 0 31 | # 2D matrix 32 | assert len(real.shape) == 2 33 | Z_upper = th.cat((real, -imag), dim=1) 34 | Z_lower = th.cat((imag, real), dim=1) 35 | Z = th.cat((Z_upper, Z_lower), dim=0) 36 | return Z.float() 37 | def make_batched_vec(real, imag=None): 38 | """ 39 | Represent complex input of shape (N, D) as real th.tensor. N is the batch size and D is the dimension 40 | """ 41 | if imag is None: 42 | imag = real * 0 43 | # 2D batched vectors 44 | assert len(real.shape) == 2 45 | Z = th.cat((real, imag), dim=1) 46 | return Z 47 | def norm_squared(Z): 48 | N, D_2 = Z.shape 49 | #assert D_2 % 2 == 0 50 | D = D_2//2 51 | real = Z[:, :D] 52 | imag = Z[:, D:] 53 | return (real**2 + imag**2) 54 | def print_complex_mat(Z, prec=None, **kwarg): 55 | np.set_printoptions(precision=prec, **kwarg) 56 | N, M = Z.shape 57 | assert (N % 2 == 0) and (M % 2 == 0) 58 | N = N//2 59 | M = M//2 60 | real = Z[:N, :M] 61 | imag = -Z[:N, M:] 62 | if (isinstance(Z, np.ndarray)): 63 | Z = real + imag * 1j 64 | else: 65 | Z = real.data.numpy() + imag.data.numpy() * 1j 66 | print("Complex Tensor: \n" + str(Z)) 67 | def print_complex_vec(Z): 68 | _, N = Z.shape 69 | assert (N % 2 == 0) 70 | N = N//2 71 | real = Z[:, :N] 72 | imag = -Z[:, N:] 73 | 74 | if (isinstance(Z, th.Tensor)): 75 | Z = real.numpy() + imag.numpy() * 1j 76 | else: 77 | Z = real.data.numpy() + imag.data.numpy() * 1j 78 | print("Complex Tensor: \n" + str(Z)) 79 | def complex_torch_to_numpy(X): 80 | X = X.data.numpy() 81 | 82 | N, M = X.shape 83 | assert (N % 2 == 0) and (M % 2 ==0) 84 | real = X[:N//2, :M//2] 85 | imag = X[N//2:, :M//2] 86 | return np.matrix(real + 1j * imag) 87 | 88 | class ComplexNorm(nn.Module): 89 | def __init__(self): 90 | super().__init__() 91 | def forward(self, X): 92 | return norm_squared(X) 93 | 94 | class ComplexLinear(nn.Module): 95 | def __init__(self, D_in, D_out, sigma=0, has_bias=False): 96 | super().__init__() 97 | self.D_in = D_in 98 | self.D_out = D_out 99 | self.has_bias = has_bias 100 | self.init_params() 101 | self.sigma = sigma 102 | def init_params(self): 103 | U = rand_unitary(self.D_out) 104 | S = th.zeros(self.D_out, self.D_in) 105 | VH = rand_unitary(self.D_in) 106 | 107 | sigma = 1/(self.D_in + self.D_out) ** 0.5 108 | if self.D_out < self.D_in: 109 | diag = th.randn(self.D_out) * sigma 110 | S[:, :self.D_out] = th.diag(diag) 111 | else: 112 | diag = th.randn(self.D_in) * sigma 113 | S[:self.D_in, :] = th.diag(diag) 114 | S = make_complex_matrix(S) 115 | 116 | M = (U@S@VH) 117 | self.M_real = Parameter(M[:self.D_out, :self.D_in]) 118 | self.M_imag = Parameter(M[self.D_out:, :self.D_in]) 119 | 120 | if self.has_bias: 121 | self.bias = Parameter(th.Tensor(D_out*2)) 122 | self.bias.data.uniform_(-sigma, sigma) 123 | else: 124 | self.register_parameter('bias', None) 125 | @property 126 | def weight(self): 127 | return make_complex_matrix(self.M_real, self.M_imag) 128 | def set_weight(self, M): 129 | self.M_real.data = M[:self.D_out, :self.D_in] 130 | self.M_imag.data = M[self.D_out:, :self.D_in] 131 | def forward(self, X): 132 | if self.sigma > 0: 133 | device = th.device('cuda' if self.weight.is_cuda else 'cpu') 134 | noise = th.zeros_like(self.weight).to(device) 135 | noise.normal_() 136 | weight = self.weight + noise * self.sigma 137 | else: 138 | weight = self.weight 139 | 140 | return F.linear(X, weight, self.bias) 141 | def get_M(self, numpy=True): 142 | U = self(th.eye(self.D_in * 2)).data.t() 143 | U_re = U[:self.D_out, :self.D_in] 144 | U_im = U[self.D_out:, :self.D_in] 145 | if numpy: 146 | return np.matrix(U_re) + 1j * np.matrix(U_im) 147 | else: 148 | return U 149 | 150 | 151 | if __name__ == '__main__': 152 | D_in, D_out = 3, 4 153 | net = ComplexLinear(D_in, D_out, 1e-2) 154 | X = th.randn(1, D_in) 155 | X = make_batched_vec(X) 156 | print(net(X)) 157 | print(net(X)) 158 | print(net(X)) 159 | print(net(X)) 160 | -------------------------------------------------------------------------------- /default_params.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | # Default file paths 4 | DIR_PATH = os.path.dirname(os.path.realpath(__file__)) 5 | 6 | 7 | # Trained Models 8 | DIR_TRAINED_MODELS = os.path.join(DIR_PATH, 'trained_models') 9 | F_COMPLEX_TRAIN = os.path.join(DIR_TRAINED_MODELS, 'complex_net.pth') 10 | F_CGRD_TRAIN = os.path.join(DIR_TRAINED_MODELS, 'cgrd.pth') 11 | F_GRID_TRAIN = os.path.join(DIR_TRAINED_MODELS, 'grid_net.pth') 12 | F_GRID_ORD_TRAIN = os.path.join(DIR_TRAINED_MODELS, 'grid_net_ordered_SV.pth') 13 | F_FFT_TRAIN = os.path.join(DIR_TRAINED_MODELS, 'fft_net.pth') 14 | 15 | DIR_COMPLEX_TRAIN = os.path.join(DIR_TRAINED_MODELS, 'complex_net') 16 | DIR_GRID_TRAIN = os.path.join(DIR_TRAINED_MODELS, 'grid_net') 17 | DIR_GRID_ORD_TRAIN = os.path.join(DIR_TRAINED_MODELS, 'grid_ord_net') 18 | DIR_FFT_TRAIN = os.path.join(DIR_TRAINED_MODELS, 'fft_net') 19 | DIR_FFT_NH_TRAIN = os.path.join(DIR_TRAINED_MODELS, 'fft_net_diff_nh') 20 | DIR_FFT_DEPTH_TRAIN = os.path.join(DIR_TRAINED_MODELS, 'fft_net_diff_depth') 21 | 22 | # Good learning rates for different networks 23 | LR_FFT = 5e-2 24 | LR_GRID = 2.5e-4 25 | LR_COMPLEX = 5e-3 26 | 27 | # Noisy Test Acc 28 | DIR_NOISY_TEST = os.path.join(DIR_PATH, 'results', 'noisy_test') 29 | F_GRID_ACC_DIAG = os.path.join(DIR_NOISY_TEST, 'GridNet_diag.npy') 30 | F_GRID_ORD_ACC_DIAG = os.path.join(DIR_NOISY_TEST, 'GridNet_ordered_SV_diag.npy') 31 | F_FFT_ACC_DIAG = os.path.join(DIR_NOISY_TEST, 'FFTNet_diag.npy') 32 | F_GRID_ACC_PSBS = os.path.join(DIR_NOISY_TEST, 'GridNet_psbs.npy') 33 | F_GRID_ORD_ACC_PSBS = os.path.join(DIR_NOISY_TEST, 'GridNet_ordered_SV_psbs.npy') 34 | F_FFT_ACC_PSBS = os.path.join(DIR_NOISY_TEST, 'FFTNet_psbs.npy') 35 | 36 | 37 | NOISY_TEST_MAX = 0.02 38 | NOISY_TEST_SIGMAS = np.linspace(0, NOISY_TEST_MAX, 21).tolist() 39 | NOISY_TEST_TRIALS = 20 40 | 41 | # Plotting outputs 42 | DIR_FIGS = os.path.join(DIR_PATH, 'figures') 43 | F_FIG_GRID_PSBS = os.path.join(DIR_FIGS, 'grid_noisy_matrix.pdf') 44 | F_FIG_FFT_PSBS = os.path.join(DIR_FIGS, 'fft_noisy_matrix.pdf') 45 | F_FIG_COMPARE_GRID_FFT = os.path.join(DIR_FIGS, 'grid_vs_fft.pdf') 46 | F_FIG_RAND_VS_ORD = os.path.join(DIR_FIGS, 'rand_vs_ordered.pdf') 47 | F_FIG_FFT_DIFF_NH = os.path.join(DIR_FIGS, 'fft_diff_nh.pdf') 48 | 49 | # Localized Noise 50 | DIR_RESULTS = os.path.join(DIR_PATH, 'results') 51 | DIR_LOC_NOISE = os.path.join(DIR_PATH, 'results', 'localized_noise') 52 | F_LN_U_RAND = os.path.join(DIR_LOC_NOISE, 'U.npy') 53 | F_LN_V_RAND = os.path.join(DIR_LOC_NOISE, 'V.npy') 54 | F_LN_U_ORD = os.path.join(DIR_LOC_NOISE, 'U_ord.npy') 55 | F_LN_V_ORD = os.path.join(DIR_LOC_NOISE, 'V_ord.npy') 56 | F_LN_U_FFT = os.path.join(DIR_LOC_NOISE, 'U_fft.npy') 57 | F_LN_V_FFT = os.path.join(DIR_LOC_NOISE, 'V_fft.npy') 58 | 59 | F_FIG_LN_ORD = os.path.join(DIR_PATH, 'figures', 'loc_noise_ord.pdf') 60 | F_FIG_LN_RAND = os.path.join(DIR_PATH, 'figures', 'loc_noise_rand.pdf') 61 | F_FIG_LN_FFT = os.path.join(DIR_PATH, 'figures', 'loc_noise_fft.pdf') 62 | 63 | # Noisy Prob 64 | DIR_NOISY_PROB = os.path.join(DIR_RESULTS, 'noisy_prob') 65 | -------------------------------------------------------------------------------- /localized_noise.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use("TkAgg") 3 | from mnist import * 4 | from optical_nn import * 5 | from train_mnist import * 6 | import complex_torch_var as ct 7 | import numpy as np 8 | import torch as th 9 | from torch.nn.parameter import Parameter 10 | import matplotlib.pyplot as plt 11 | from mpl_toolkits.axes_grid1 import make_axes_locatable 12 | from mnist import * 13 | from torch import optim 14 | from glob import iglob 15 | from scipy import optimize 16 | import os 17 | import matplotlib.gridspec as gridspec 18 | from default_params import * 19 | LAYER = 2 20 | 21 | def load_model(use_fft): 22 | if not use_fft: 23 | net = optical_net() 24 | f_path = './nl_optical_256.pth' 25 | else: 26 | net = fft_net() 27 | f_path = './fft_mnist/nl_train.pth' 28 | 29 | net.load_state_dict(th.load(f_path, map_location=DEVICE)) 30 | return net 31 | def grid_phase_mat(net, checkered=False): 32 | for n, p in net.named_parameters(): 33 | if n == 'theta_A': 34 | phase = p.data.cpu().numpy() 35 | nLA, nDA = phase.shape 36 | theta_a = ((phase + np.pi) % (np.pi * 2)) - np.pi 37 | if n == 'theta_B': 38 | phase = p.data.cpu().numpy() 39 | nLB, nDB = phase.shape 40 | theta_b = np.zeros((nLB, nDA), dtype=theta_a.dtype) 41 | theta_b[:, :nDB] = ((phase + np.pi) % (np.pi * 2)) - np.pi 42 | 43 | theta = np.zeros((nLA*2, nDA*2), dtype=theta_a.dtype) 44 | theta[0::2, 0::2] = theta_a 45 | theta[1::2, 1::2] = theta_b 46 | if not checkered: 47 | theta[0::2, 1::2] = theta_a 48 | theta[1::2, 2::2] = theta_b[:, :-1] 49 | return theta 50 | def central_band_std(refls, delta=10): 51 | horz_cut_refls = (refls[:, 128-delta:128+delta:2, :]).copy() 52 | horz_cut_refls += (refls[:, 128-delta+1:128+delta:2, :]) 53 | horz_cut_refls = horz_cut_refls.reshape((-1, horz_cut_refls.shape[-1])) 54 | 55 | horz_cut_refl_std = np.std(horz_cut_refls, axis=0) 56 | return horz_cut_refl_std[:-1] 57 | plt.title(r'Distribution of Internal Phaseshift ($\theta_{m, l}$)') 58 | plt.plot(np.arange(256-1), horz_cut_relf_std[:-1]) 59 | def get_theta_refl(f_list): 60 | refls = [] 61 | thetas = [] 62 | for f in f_list: 63 | net.load_state_dict(th.load(f, map_location=DEVICE)) 64 | theta = grid_phase_mat(net[2].U, checkered=False) 65 | refl = np.sin(theta/2) 66 | 67 | refls.append(refl) 68 | thetas.append(theta) 69 | 70 | return np.array(thetas), np.array(refls) 71 | def plot_MZIs(refl, name=False, full=True): 72 | fig, ax = plt.subplots() 73 | im = ax.imshow(refl, vmin=.0, vmax=np.pi/2, cmap='inferno') 74 | if full: 75 | plt.title(r'Distribution of MZI phase in $U_2$', fontsize=14) 76 | plt.xlabel('Layer depth (l)', fontsize=14) 77 | plt.ylabel('Dimension (d)', fontsize=14) 78 | divider = make_axes_locatable(ax) 79 | cax = divider.append_axes('right', size='5%', pad=0.05) 80 | cbar = fig.colorbar(im, cax=cax, ticks=[0, 0.25 * np.pi, 0.5 * np.pi]) 81 | cbar.ax.set_yticklabels( 82 | ['0', r'$\pi/4$', r'$\geq\pi/2$'] 83 | ) 84 | else: 85 | ax.set_yticks(np.arange(refl.shape[0]), minor=True) 86 | ax.set_yticks(np.arange(refl.shape[0], 5), minor=False) 87 | ax.set_xticks(np.arange(refl.shape[0]), minor=True) 88 | ax.set_xticks(np.arange(refl.shape[0], 5), minor=False) 89 | 90 | ax.set_xticklabels(ax.xaxis.get_majorticklabels()) 91 | 92 | ax.grid(which='minor', linestyle='-', linewidth=0.5, color='white', alpha=0.4) 93 | if full: 94 | fig.savefig(f'./grid_phase_pos.pdf', dpi=400) 95 | else: 96 | fig.savefig(f'./grid_phase_pos_{name}.pdf', dpi=400) 97 | def make_hist(): 98 | N_BINS = 50 99 | for n, idx in regions.items(): 100 | if n == 'center': 101 | c = 'red' 102 | elif n == 'edge': 103 | c = 'lime' 104 | else: 105 | c = 'blue' 106 | thetas_crop = thetas[:, idx[0], idx[1]] 107 | thetas_flat = thetas_crop.flatten() 108 | plt.hist(thetas_flat, bins=N_BINS, range=(0, np.pi/2), density=True, color=c, alpha=.7) 109 | plt.xticks( 110 | [x * np.pi for x in [0, 1/8, 1/4, 3/8, 1/2]], 111 | [ r'0', r'', r'$\pi/4$', r'', r'$\pi/2$', ] 112 | ) 113 | plt.xlabel(r'Internal Phase Shift ($\theta$)', fontsize=14) 114 | plt.ylabel(r'Normalized Frequency', fontsize=14) 115 | plt.title(r'Distribution of Phase Shift in $U_2$ of GridNet', fontsize=14) 116 | plt.savefig(f'./grid_phase_hist.pdf') 117 | def plot_local_sensitivity(U_mat, V_mat, trans, f_out=None, color_scale=1): 118 | s = color_scale 119 | vmax = max(U_mat.max(), V_mat.max()) 120 | vmin = min(U_mat.min(), V_mat.min()) 121 | if vmax > -vmin: 122 | vmax, vmin = s*vmax, -s*vmax 123 | else: 124 | vmax, vmin = -s*vmin, s*vmin 125 | 126 | #Make subplot grid 127 | plt.figure(figsize=(17, 5)) 128 | gs = gridspec.GridSpec(1, 4, width_ratios=[3, .8, 3, .3]) 129 | VH_ax = plt.subplot(gs[0, 0]) 130 | U_ax = plt.subplot(gs[0, 2]) 131 | S_ax = plt.subplot(gs[0, 1]) 132 | cbar_ax = plt.subplot(gs[0, 3]) 133 | 134 | cmap = 'PiYG' 135 | U_im = U_ax.imshow(U_mat.T, cmap=cmap, vmin=vmin, vmax=vmax) 136 | VH_im = VH_ax.imshow(V_mat.T, cmap=cmap, vmin=vmin, vmax=vmax) 137 | S_ax.plot(trans, np.arange(256), 'k') 138 | S_ax.set_ylim([0, 255]) 139 | S_ax.invert_yaxis() 140 | 141 | central_ticks = [0, 8, 40, 72, 104, 136, 168, 200, 232] 142 | 143 | VH_ax.set_xticklabels(central_ticks) 144 | VH_ax.set_yticklabels(central_ticks) 145 | U_ax.set_xticklabels(central_ticks) 146 | U_ax.set_yticklabels(central_ticks) 147 | S_ax.set_yticks(central_ticks) 148 | 149 | VH_ax.set_title(r'$V_2^\dagger$', fontsize=20) 150 | VH_ax.set_xlabel('Layer Depth (l)', fontsize=14) 151 | VH_ax.set_ylabel('Dimension (n)', fontsize=14) 152 | 153 | U_ax.set_title(r'$U_2$', fontsize=20) 154 | U_ax.set_xlabel('Layer Depth (l)', fontsize=14) 155 | 156 | S_ax.set_title(r'$\Sigma_2$', fontsize=20) 157 | S_ax.set_xlabel('Transmissivity', fontsize=14) 158 | 159 | plt.colorbar(VH_im, cax=cbar_ax) 160 | cbar_ax.yaxis.set_label_position('left') 161 | cbar_ax.set_ylabel('Accuracy Change', fontsize=14) 162 | 163 | if f_out: 164 | plt.savefig(f_out) 165 | def calculate_local_sensitivity(net, U, f_name=None, sig=1e-1, block_size=8): 166 | perf_acc = get_acc(net) 167 | #Generate noise 168 | theta_A = U.theta_A.clone() 169 | theta_B = U.theta_B.clone() 170 | mask_A = th.zeros_like(theta_A) 171 | mask_B = th.zeros_like(theta_B) 172 | noise_A = th.zeros_like(theta_A) 173 | noise_B = th.zeros_like(theta_B) 174 | noise_A.normal_() 175 | noise_B.normal_() 176 | 177 | N = U.D//2 178 | acc_mat = np.zeros((N//block_size, N//block_size)) 179 | for i in range(N//block_size): 180 | for j in range(N//block_size): 181 | print(i, j) 182 | idx = ( 183 | slice((i)*block_size,(i+1)*block_size), slice((j)*block_size,(j+1)*block_size) 184 | ) 185 | mask_A *= 0 186 | mask_B *= 0 187 | mask_A[idx] = 1 188 | mask_B[idx] = 1 189 | U.theta_A = Parameter(theta_A + sig * noise_A * mask_A) 190 | U.theta_B = Parameter(theta_B + sig * noise_B * mask_B) 191 | 192 | #theta = grid_phase_mat(net[2].VH) 193 | #plt.imshow(theta) 194 | #plt.show() 195 | acc = get_acc(net) - perf_acc 196 | acc_mat[i, j] = acc 197 | print(acc) 198 | 199 | if f_name: 200 | np.save(f_name, acc_mat) 201 | return acc_mat 202 | def make_plot(rand_S=True, s=1, layer_n=LAYER): 203 | net = load_grid(rand_S=rand_S) 204 | theta = net[layer_n].S.theta 205 | trans = (th.sin(theta/2)**2).cpu().data.numpy() 206 | if rand_S: 207 | U_name = F_LN_U_RAND 208 | V_name = F_LN_V_RAND 209 | f_out = F_FIG_LN_RAND 210 | else: 211 | U_name = F_LN_U_ORD 212 | V_name = F_LN_V_ORD 213 | f_out = F_FIG_LN_ORD 214 | 215 | U_mat = np.load(U_name) 216 | V_mat = np.load(V_name) 217 | print(f_out) 218 | plot_local_sensitivity(U_mat, V_mat, trans, f_out, color_scale=s) 219 | def get_mats(rand_S=True, layer_n=LAYER): 220 | net = load_grid(rand_S=rand_S) 221 | layer = net[layer_n] 222 | VH = layer.VH 223 | U = layer.U 224 | if rand_S: 225 | U_name = F_LN_U_RAND 226 | V_name = F_LN_V_RAND 227 | else: 228 | U_name = F_LN_U_ORD 229 | V_name = F_LN_V_ORD 230 | calculate_local_sensitivity(net, U, U_name, sig=1e-1) 231 | calculate_local_sensitivity(net, VH, V_name, sig=1e-1) 232 | 233 | def calculate_local_sensitivity_fft(net, U, f_name=None, sig=1e-1, block_size=8): 234 | perf_acc = get_acc(net) 235 | 236 | #Generate noise 237 | theta = U.theta.clone() 238 | mask = th.zeros_like(theta) 239 | noise = th.zeros_like(theta) 240 | noise.normal_() 241 | 242 | N, M = theta.shape 243 | acc_mat = np.zeros((N//block_size, M//block_size)) 244 | for i in range(N//block_size): 245 | for j in range(M//block_size): 246 | print(i, j) 247 | idx = ( 248 | slice((i)*block_size,(i+1)*block_size), slice((j)*block_size,(j+1)*block_size) 249 | ) 250 | mask *= 0 251 | mask[idx] = 1 252 | U.theta = Parameter(theta + sig * noise * mask) 253 | acc = get_acc(net) - perf_acc 254 | acc_mat[i, j] = acc 255 | print(acc) 256 | if f_name: 257 | np.save(f_name, acc_mat) 258 | return acc_mat 259 | def get_mats_fft(layer_n=LAYER): 260 | net = load_fft() 261 | layer = net[layer_n] 262 | U = layer.U 263 | VH = layer.VH 264 | U_name = F_LN_U_FFT 265 | V_name = F_LN_V_FFT 266 | calculate_local_sensitivity_fft(net, U, U_name, sig=1e-1) 267 | calculate_local_sensitivity_fft(net, VH, V_name, sig=1e-1) 268 | def make_plot_fft(s=1, layer_n=LAYER): 269 | net = load_fft() 270 | theta = net[layer_n].S.theta 271 | trans = (th.sin(theta/2)**2).cpu().data.numpy() 272 | U_name = F_LN_U_FFT 273 | V_name = F_LN_V_FFT 274 | f_out = F_FIG_LN_FFT 275 | 276 | U_mat = np.load(U_name) 277 | V_mat = np.load(V_name) 278 | plot_local_sensitivity(U_mat, V_mat, trans, f_out, color_scale=s) 279 | if __name__ == '__main__': 280 | net = load_fft() 281 | layer = net[LAYER] 282 | U = layer.U 283 | print(U) 284 | 285 | make_plot_fft() 286 | 287 | assert False 288 | get_mats_fft() 289 | 290 | get_mats(True) 291 | get_mats(False) 292 | make_plot(True) 293 | make_plot(False, 0.4) 294 | -------------------------------------------------------------------------------- /mnist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch as th 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | from time import time 6 | from torchvision import datasets, transforms 7 | import matplotlib.pylab as plt 8 | from optical_nn import * 9 | import complex_torch_var as ct 10 | from time import time 11 | import os 12 | from default_params import * 13 | 14 | 15 | TEST_SIZE = 10_000 16 | BATCH_SIZE = 100 17 | USE_CUDA = th.cuda.is_available() 18 | if USE_CUDA: 19 | print('Using CUDA') 20 | DEVICE = th.device('cuda') 21 | else: 22 | print('Using CPU') 23 | DEVICE = th.device('cpu') 24 | 25 | # Define loader 26 | def mnist_loader(train=True, batch_size=BATCH_SIZE, shuffle=True): 27 | loader = th.utils.data.DataLoader( 28 | datasets.MNIST('../data', train=train, download=True, 29 | transform=transforms.Compose([ 30 | transforms.ToTensor(), 31 | transforms.Normalize((0.1307,), (0.3081,)) 32 | ])), 33 | batch_size=batch_size, shuffle=shuffle) 34 | return loader 35 | 36 | # Get X for testing 37 | for data, target in mnist_loader(train=False, batch_size=100, shuffle=False): 38 | continue 39 | data = data.view(-1, 28**2) 40 | data, target = data.to(DEVICE), target.to(DEVICE) 41 | X0 = data[82][None, :] 42 | 43 | 44 | # Network dims 45 | N_IN = 28**2//2 46 | 47 | 48 | def mnist_ONN(unitary=Unitary, num_in=N_IN, num_out=10, num_h1=256, num_h2=256, hidden_units=[256, 256], device=DEVICE, sigma_PS=0, sigma_BS=0, T0=0.03): 49 | """ 50 | Creates a MLP for training on MNIST 51 | args: 52 | unitary: The type of unitary layer used (GridUnitary, FFTUnitary, etc.) 53 | num_h1: The number of hidden units in the first layer 54 | num_h2: The number of hidden units in the second layer 55 | device: The device to be used by torch. 'cpu' or 'cuda' 56 | sigma_PS: The stdev on uncertainty added to phaseshifter 57 | sigma_BS: The stdev on uncertainty added to beamsplitter 58 | returns: 59 | A th.nn.Sequential module with above features 60 | """ 61 | f = ShiftedSoftplus(T=T0) 62 | layers = [ 63 | Linear(num_in, hidden_units[0], sigma_PS=sigma_PS, sigma_BS=sigma_BS, UNet=unitary), 64 | ModNonlinearity(f=f) 65 | ] 66 | for nh_, nh in zip(hidden_units[:-1], hidden_units[1:]): 67 | layers.extend([ 68 | Linear(nh_, nh, sigma_PS=sigma_PS, sigma_BS=sigma_BS, UNet=unitary), 69 | ModNonlinearity(f=f), 70 | ]) 71 | layers.extend([ 72 | Linear(hidden_units[-1], num_out, sigma_PS=sigma_PS, sigma_BS=sigma_BS, UNet=unitary), 73 | ComplexNorm(), 74 | nn.LogSoftmax(dim=1) 75 | ]) 76 | net = NoisySequential(*layers).to(device) 77 | return net 78 | def mnist_complex(num_in=N_IN, num_out=10, hidden_units=[256, 256], device=DEVICE, sigma=0, T0=0.03): 79 | f = ShiftedSoftplus(T=T0) 80 | layers = [ 81 | ComplexLinear(num_in, hidden_units[0]), 82 | ModNonlinearity(f=f) 83 | ] 84 | for nh_, nh in zip(hidden_units[:-1], hidden_units[1:]): 85 | layers.extend([ 86 | ComplexLinear(nh_, nh), 87 | ModNonlinearity(f=f), 88 | ]) 89 | layers.extend([ 90 | ComplexLinear(hidden_units[-1], num_out), 91 | ComplexNorm(), 92 | nn.LogSoftmax(dim=1) 93 | ]) 94 | net = nn.Sequential(*layers).to(device) 95 | def to_grid_net(rand_S=True): 96 | grid_net = mnist_ONN(num_in=num_in, num_out=num_out, hidden_units=hidden_units) 97 | for lc, lo in zip(net, grid_net): 98 | if isinstance(lc, ComplexLinear): 99 | assert isinstance(lo, Linear) 100 | assert lc.weight.shape == (2*lo.D_out, 2*lo.D_in) 101 | M = lc.weight.to('cpu').data 102 | t0 = time() 103 | print(f'Converting weights of size {M.shape}') 104 | lo.emul_M(M, rand_S=rand_S) 105 | print(time()-t0) 106 | return grid_net 107 | net.to_grid_net = to_grid_net 108 | return net 109 | def train(model, n_epochs, log_interval, optim_params, batch_size=100, criterion=nn.NLLLoss(), device=DEVICE, epoch_callback=None, log_callback=None): 110 | loader = mnist_loader(train=True, batch_size=batch_size) 111 | optimizer = optim.SGD(model.parameters(), **optim_params) 112 | #criterion = nn.NLLLoss() 113 | 114 | t0 = time() 115 | for epoch in range(n_epochs): 116 | for batch_idx, (data, target) in enumerate(loader): 117 | data = data.view(-1, 28**2).to(device) 118 | target = target.to(device) 119 | out = model(data) 120 | optimizer.zero_grad() 121 | loss = criterion(out, target) 122 | loss.backward() 123 | optimizer.step() 124 | if batch_idx % log_interval == 0 and batch_idx != 0: 125 | t = time() - t0 126 | t0 = time() 127 | acc = (out.argmax(1) == target).float().mean() 128 | #acc = get_acc(model, device) 129 | out = model(data) 130 | print(f'Epoch: {epoch}, Train loss: {loss.float():.4f}, Train acc: {acc:.4f}, Time/it: {t/log_interval * 1e3:.4f} ms') 131 | if log_callback: 132 | log_callback(model, epoch) 133 | if epoch_callback: 134 | epoch_callback(model, epoch) 135 | def get_acc(model, device=DEVICE): 136 | with th.no_grad(): 137 | for data, target in mnist_loader(train=False, batch_size=TEST_SIZE): 138 | data = data.view(-1, 28**2) 139 | data, target = data.to(device), target.to(device) 140 | out = model(data) 141 | pred = out.argmax(1) 142 | acc = (pred == target).float().mean() 143 | 144 | return acc.item() 145 | 146 | class StackedFFTUnitary(nn.Sequential): 147 | def __init__(self, D, n_stack=None, sigma_PS=0, sigma_BS=0): 148 | if n_stack is None: 149 | P = int(log2(D)) 150 | n_stack = int(D//P) 151 | layers = [FFTUnitary(D, sigma_PS=sigma_PS, sigma_BS=sigma_BS) for _ in range(n_stack)] 152 | 153 | super().__init__(*layers) 154 | self.sigma_PS = sigma_PS 155 | self.sigma_BS = sigma_BS 156 | @property 157 | def sigma_PS(self): 158 | return self._sigma_PS 159 | @property 160 | def sigma_BS(self): 161 | return self._sigma_BS 162 | @sigma_PS.setter 163 | def sigma_PS(self, new_sig): 164 | # Updates sigma of all layers 165 | for layer in self: 166 | layer.sigma_PS = new_sig 167 | self._sigma_PS = new_sig 168 | @sigma_BS.setter 169 | def sigma_BS(self, new_sig): 170 | # Updates sigma of all layers 171 | for layer in self: 172 | layer.sigma_BS = new_sig 173 | self._sigma_BS = new_sig 174 | 175 | def mnist_stacked_fft(n_stack, device=DEVICE, T0=0.03): 176 | f = ShiftedSoftplus(T=0.03) 177 | layers = [ 178 | Linear(N_IN, 256), 179 | ModNonlinearity(f=f), 180 | StackedFFTUnitary(256, n_stack=n_stack), 181 | Diagonal(256, 256), 182 | StackedFFTUnitary(256, n_stack=n_stack), 183 | ModNonlinearity(f=f), 184 | Linear(256, 10), 185 | ComplexNorm(), 186 | nn.LogSoftmax(dim=1) 187 | ] 188 | net = NoisySequential(*layers).to(device) 189 | return net 190 | 191 | def mnist_grid_truncated(num_in=N_IN, num_out=10, hidden_units=[256, 256], device=DEVICE, sigma_PS=0, sigma_BS=0, T0=0.03): 192 | f = ShiftedSoftplus(T=T0) 193 | layers = [ 194 | TruncatedGridLinear(num_in, hidden_units[0], sigma_PS=sigma_PS, sigma_BS=sigma_BS), 195 | ModNonlinearity(f=f) 196 | ] 197 | for nh_, nh in zip(hidden_units[:-1], hidden_units[1:]): 198 | layers.extend([ 199 | TruncatedGridLinear(nh_, nh, sigma_PS=sigma_PS, sigma_BS=sigma_BS), 200 | ModNonlinearity(f=f), 201 | ]) 202 | layers.extend([ 203 | TruncatedGridLinear(hidden_units[-1], num_out, sigma_PS=sigma_PS, sigma_BS=sigma_BS), 204 | ComplexNorm(), 205 | nn.LogSoftmax(dim=1) 206 | ]) 207 | net = NoisySequential(*layers).to(device) 208 | return net 209 | 210 | 211 | if __name__ == '__main__': 212 | 213 | assert False 214 | LR_FFT = 5e-2 215 | LR_GRID = 2.5e-4 216 | LR_COMPLEX = 5e-3 217 | train_params = {} 218 | train_params['n_epochs'] = 5 219 | train_params['log_interval'] = 100 220 | train_params['batch_size'] = 100 221 | 222 | optim_params = {} 223 | optim_params['lr'] = LR_FFT 224 | optim_params['momentum'] = .9 225 | 226 | net = mnist_grid_truncated() 227 | print(net(X0)) 228 | train(net, **train_params, optim_params=optim_params) 229 | optim_params['lr'] /= 5 230 | train(net, **train_params, optim_params=optim_params) 231 | th.save(net, os.path.join(DIR_TRAINED_MODELS, 'truncated_grid.pth')) 232 | 233 | 234 | assert False 235 | net = mnist_stacked_fft(1) 236 | train(net, **train_params, optim_params=optim_params) 237 | optim_params['lr'] /= 5 238 | train(net, **train_params, optim_params=optim_params) 239 | th.save(net, os.path.join(DIR_TRAINED_MODELS, 'stacked_fft_1.pth')) 240 | print(get_acc(net)) 241 | net_loaded = th.load(os.path.join(DIR_TRAINED_MODELS, 'stacked_fft_1.pth')) 242 | print(get_acc(net_loaded)) 243 | 244 | assert False 245 | train(net, **train_params, optim_params=optim_params) 246 | optim_params['lr'] /= 5 247 | train(net, **train_params, optim_params=optim_params) 248 | th.save(net, os.path.join(DIR_TRAINED_MODELS, 'stacked_fft_32.pth')) 249 | print(get_acc(net)) 250 | net_loaded = th.load(os.path.join(DIR_TRAINED_MODELS, 'stacked_fft_32.pth')) 251 | print(get_acc(net_loaded)) 252 | -------------------------------------------------------------------------------- /noisy_fidelity.py: -------------------------------------------------------------------------------- 1 | from default_params import * 2 | import torch as th 3 | import numpy as np 4 | from optical_nn import * 5 | 6 | def get_fidelity(U0, U, normalize=False): 7 | D, _ = U.shape 8 | num = np.trace(U.H @ U0) 9 | if normalize: 10 | den = (D * np.trace(U.H @ U))**0.5 11 | else: 12 | den = D 13 | return np.abs(num / den)**2 14 | def noisy_f_test_diag(net, f_out, sigma_list=NOISY_TEST_SIGMAS, n_trials=NOISY_TEST_TRIALS): 15 | U0 = net.get_U() 16 | fidelity = np.zeros((n_trials, len(sigma_list))) 17 | for i, sig in enumerate(sigma_list): 18 | net.sigma_BS = sig 19 | net.sigma_PS = sig 20 | print(f'sigma : {sig}') 21 | for n in range(n_trials): 22 | U = net.get_U() 23 | F = get_fidelity(U0, U) 24 | print(f'Trial {n}, Fidelity {F:.4f}') 25 | fidelity[n, i] = F 26 | print(fidelity[:, i].mean()) 27 | print(np.std(fidelity[:, i])) 28 | 29 | np.save(f_out, fidelity) 30 | return fidelity 31 | 32 | if __name__ == '__main__': 33 | 34 | D = 256 35 | grid_unitary = th.load(os.path.join(DIR_TRAINED_MODELS, 'GridUnitary_256.pth')) 36 | grid_unitary.approx_sigma_bs=False 37 | stacked_net = StackedFFTUnitary(256, n_stack=32) 38 | fft_net = FFTUnitary(256) 39 | trunc_net = Unitary.truncated(8)(256) 40 | 41 | f_unit = os.path.join(DIR_RESULTS, 'unitary_fidelity.npy') 42 | f_stack = os.path.join(DIR_RESULTS, 'stacked_fft_32_fid.npy') 43 | f_fft = os.path.join(DIR_RESULTS, 'fft_fid.npy') 44 | f_trunc = os.path.join(DIR_RESULTS, 'trunc_fid.npy') 45 | #noisy_f_test_diag(stacked_net, f_stack, n_trials=20) 46 | noisy_f_test_diag(trunc_net, f_trunc, n_trials=20) 47 | noisy_f_test_diag(fft_net, f_fft, n_trials=20) 48 | -------------------------------------------------------------------------------- /noisy_prob.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use("TkAgg") 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import torch as th 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | from time import time 9 | from torchvision import datasets, transforms 10 | import complex_torch_var as ct 11 | from time import time 12 | from mnist import * 13 | from default_params import * 14 | from train_mnist import * 15 | 16 | def noisy_prob_test(net, n_trials, sigma_BS, sigma_PS, f_name): 17 | noisy_prob = np.zeros((n_trials, 10)) 18 | net.set_noise(sigma_PS, sigma_BS) 19 | 20 | for n in range(n_trials): 21 | prob = (th.exp(net(X0))).data.cpu().numpy()[0] 22 | noisy_prob[n] = prob 23 | 24 | if f_name: 25 | np.save(f_name, noisy_prob) 26 | 27 | return noisy_prob 28 | def noisy_prob_plot(noisy_prob, title=None, f_name=None): 29 | print(f_name) 30 | print(noisy_prob) 31 | 32 | med_prob = np.quantile(noisy_prob, 0.5, axis=0) 33 | low_prob = np.quantile(noisy_prob, 0.2, axis=0) 34 | high_prob = np.quantile(noisy_prob, 0.8, axis=0) 35 | mean_prob = noisy_prob.mean(axis=0) 36 | no_err = np.all(low_prob == high_prob) 37 | 38 | err_h = high_prob - med_prob 39 | err_l = med_prob - low_prob 40 | 41 | plt.xticks(np.arange(10), fontsize=14) 42 | plt.yticks(fontsize=14) 43 | plt.ylim([0,1]) 44 | print(med_prob) 45 | 46 | if title: 47 | plt.title(title, fontsize=18) 48 | 49 | if no_err: 50 | plt.bar(list(range(10)), mean_prob, color='white', edgecolor='black') 51 | else: 52 | plt.bar(list(range(10)), mean_prob, yerr=[err_l, err_h], capsize=5, color='white', edgecolor='black') 53 | plt.ylabel('Predication Probability', fontsize=16) 54 | plt.xlabel('Class', fontsize=16) 55 | 56 | if f_name: 57 | plt.savefig(f_name) 58 | 59 | if __name__ == '__main__': 60 | 61 | USE_FFT = True 62 | if USE_FFT: 63 | net = load_fft() 64 | arch_name = 'FFT' 65 | else: 66 | net = load_grid() 67 | arch_name = 'grid' 68 | n_trials = 20 69 | sigma_PS = 0.01 * 1 70 | sigma_BS = 0.01 * 1 71 | dir = DIR_FIGS 72 | for sigma_PS, sigma_BS, suff in [(0, 0, ''), (0.01, 0.01, '_PS_BS')]: 73 | prob = noisy_prob_test(net, n_trials, sigma_BS * 2, sigma_PS, f_name=None) 74 | if suff == '': 75 | title = f'Output with Ideal Components ({arch_name})' 76 | elif suff == '_PS_BS': 77 | title = f'Output with Imprecise Components ({arch_name})' 78 | f_out = os.path.join(DIR_FIGS, f'{arch_name}{suff}.pdf') 79 | noisy_prob_plot(prob, title=title, f_name=f_out) 80 | plt.cla() 81 | plt.clf() 82 | plt.close() 83 | -------------------------------------------------------------------------------- /noisy_test.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use("TkAgg") 3 | import matplotlib.pylab as plt 4 | import numpy as np 5 | import torch as th 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | from time import time 9 | from torchvision import datasets, transforms 10 | import complex_torch_var as ct 11 | from time import time 12 | from scipy.ndimage import uniform_filter 13 | from mnist import * 14 | from train_mnist import * 15 | from default_params import * 16 | import os 17 | 18 | 19 | def noisy_test_diag(net, f_out, sigma_list=NOISY_TEST_SIGMAS, n_trials=NOISY_TEST_TRIALS): 20 | accuracies = np.zeros((n_trials, len(sigma_list))) 21 | for i, sig in enumerate(sigma_list): 22 | net.set_noise(sig, sig) 23 | print(f'sigma : {sig}') 24 | for n in range(n_trials): 25 | acc = get_acc(net) 26 | print(f'Trial {n}, Accuracy {acc:.4f}') 27 | accuracies[n, i] = acc 28 | print(accuracies[:, i].mean()) 29 | print(np.std(accuracies[:, i])) 30 | 31 | np.save(f_out, accuracies) 32 | return accuracies 33 | def noisy_test_bs_ps(net, f_out, sigma_list=NOISY_TEST_SIGMAS, n_trials=NOISY_TEST_TRIALS): 34 | accuracies = np.zeros((n_trials, len(sigma_list), len(sigma_list))) 35 | for i, sig_p in enumerate(sigma_list): 36 | for j, sig_b in enumerate(sigma_list): 37 | net.set_noise(sig_p, sig_b) 38 | print(f'sigma : {sig_p, sig_b}') 39 | for n in range(n_trials): 40 | acc = get_acc(net) 41 | accuracies[n, i, j] = acc 42 | print(f'Trial {n}, Accuracy {acc:.4f}') 43 | mean = accuracies[:, i, j].mean() 44 | std = np.std(accuracies[:, i, j]) 45 | print(f'Accuracy mean {mean:.4f}, stdev {std:.4f}') 46 | 47 | np.save(f_out, accuracies) 48 | return accuracies 49 | 50 | def extract_diag_fft(): 51 | acc_psbs = np.load(F_FFT_ACC_PSBS) 52 | acc_diag = np.array([np.diag(x) for x in acc_psbs]) 53 | np.save(F_FFT_ACC_DIAG, acc_diag) 54 | def extract_diag_grid(): 55 | acc_psbs = np.load(F_GRID_ACC_PSBS) 56 | acc_diag = np.array([np.diag(x) for x in acc_psbs]) 57 | np.save(F_GRID_ACC_DIAG, acc_diag) 58 | 59 | def test_grid(): 60 | grid_net = load_grid() 61 | noisy_test_diag(grid_net, f_out=F_GRID_ACC_DIAG) 62 | def test_grid_ordered_sv(): 63 | grid_net = load_grid(rand_S=False) 64 | noisy_test_diag(grid_net, f_out=F_GRID_ORD_ACC_DIAG) 65 | def test_fft(diag=True): 66 | fft_net = load_fft() 67 | if diag: 68 | noisy_test_diag(fft_net, f_out=F_FFT_ACC_DIAG) 69 | else: 70 | noisy_test_bs_ps(fft_net, f_out=F_FFT_ACC_PSBS) 71 | if __name__ == '__main__': 72 | 73 | assert False 74 | #fft_net = load_fft().to(DEVICE) 75 | #grid_net = load_grid().to(DEVICE) 76 | net = th.load(os.path.join(DIR_TRAINED_MODELS, 'stacked_fft_1.pth')) 77 | print(get_acc(net)) 78 | noisy_test_diag(net, f_out=os.path.join(DIR_NOISY_TEST, 'stacked_fft_1.npy')) 79 | 80 | assert False 81 | net = th.load(os.path.join(DIR_TRAINED_MODELS, 'stacked_fft_32.pth')) 82 | print(get_acc(net)) 83 | noisy_test_diag(net, f_out=os.path.join(DIR_NOISY_TEST, 'stacked_fft_diag.npy')) 84 | 85 | assert False 86 | cgrd_net = mnist_ONN(unitary=CGRDUnitary) 87 | cgrd_net.load_state_dict( th.load(os.path.join(DIR_TRAINED_MODELS, 'cgrd.pth'))) 88 | noisy_test_diag(cgrd_net, f_out=os.path.join(DIR_NOISY_TEST, 'CGRD_diag.npy')) 89 | 90 | 91 | 92 | #noisy_test_bs_ps(fft_net, f_out=F_FFT_ACC_PSBS) 93 | #noisy_test_bs_ps(grid_net, f_out=F_GRID_ACC_PSBS) 94 | #test_grid_ordered_sv() 95 | -------------------------------------------------------------------------------- /optical_nn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch as th 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.nn.parameter import Parameter 6 | import complex_torch_var as ct 7 | from numpy.linalg import svd as np_svd 8 | from time import time 9 | import matplotlib.pylab as plt 10 | from unitary_decomp import unitary_decomp 11 | from math import log2, ceil, pi, log, exp 12 | from functools import partial 13 | 14 | def svd(M, rand_S=True): 15 | """ 16 | Performs SVD decomposition. optionally, randomly permutes the singular values. 17 | Args: 18 | M: A numpy array to be decomposed 19 | rand_S: If True, randomly permutes the singular values. 20 | 21 | Returns: 22 | U, VH: Unitary np.arrays 23 | S: 1-d np.array of singular values 24 | """ 25 | 26 | U, S, VH = np_svd(M) 27 | if not rand_S: 28 | return U, S, VH 29 | else: 30 | Di = VH.shape[0] 31 | Do = U.shape[0] 32 | if Di > Do: 33 | perm = np.random.permutation(Do) 34 | perm_ = np.arange(Di) 35 | perm_[:Do] = perm 36 | return U[:, perm], S[perm], VH[perm_, :] 37 | else: 38 | perm = np.random.permutation(Di) 39 | perm_ = np.arange(Do) 40 | perm_[:Di] = perm 41 | return U[:, perm_], S[perm], VH[perm, :] 42 | def fft_idx(p, j): 43 | """ 44 | Gives the indices for efficient implementation of FFTUnitary 45 | Args: 46 | p: The dimension of FFT layer is 2**p 47 | j: The layer for which indices are to be returned 48 | 49 | Returns: 50 | idx_uv : The inverse indices to be mixed. Used in defining UV_FFT 51 | idx_x : The indices used to permute X 52 | 53 | Example: 54 | fft_idx(3, 2) = 55 | (tensor([0, 2, 4, 6, 1, 3, 5, 7]), tensor([4, 5, 6, 7, 0, 1, 2, 3])) 56 | 57 | The channels being mixed are (0, 4), (1, 5), (2, 6), and (3, 7) 58 | 59 | UV are constructed as 60 | 61 | UV <- UV[0, 2, 4, 6, 1, 3, 5, 7] 62 | or 63 | UV[0, 4, 1, 5, 2, 6, 3, 7] <- UV 64 | 65 | Pairs of channels mixed as desired 66 | 67 | The original input : 0 1 2 3 4 5 6 7 68 | Permuted input : 4 5 6 7 0 1 2 3 69 | 70 | Again, the channels mixed are as expected 71 | """ 72 | 73 | assert j < p 74 | perm = th.arange(2**p) 75 | _mask = 2**(j+1) - 1 76 | __mask = 2**j - 1 77 | mask_ = ~_mask 78 | perm_ = perm & mask_ 79 | _perm = perm & _mask 80 | __perm = perm & __mask 81 | idx_uv = (__perm << 1) + (_perm >> j) + perm_ 82 | idx_x = perm ^ 2**(j) 83 | 84 | return idx_uv, idx_x 85 | def get_UV(theta, phi, dr=0, dr_=0): 86 | r = 2**(-0.5) + dr 87 | r_ = 2**(-0.5) + dr_ 88 | 89 | t = (1 - r**2)**0.5 90 | t_ = (1 - r_**2)**0.5 91 | 92 | s_phi, c_phi = th.sin(phi), th.cos(phi) 93 | s_theta, c_theta = th.sin(theta), th.cos(theta) 94 | s_sum, c_sum = th.sin(theta + phi), th.cos(theta + phi) 95 | 96 | u_re = (r*r_*c_sum - t*t_*c_phi, r*r_ - t*t_*c_theta) 97 | v_re = (-r*t_*s_theta, -r_*t*s_sum - r*t_*s_phi) 98 | u_im = (r*r_*s_sum - t*t_*s_phi, -t*t_*s_theta) 99 | v_im = (t*r_ + r*t_*c_theta, t*r_*c_sum + r*t_*c_phi) 100 | 101 | return u_re, v_re, u_im, v_im 102 | def UV_MZ(D, theta, phi, stage, BS_noise=[1, 1], new=False): 103 | 104 | n_layers = theta.shape[0] 105 | assert phi.shape[0] == n_layers 106 | 107 | if stage == 'A': 108 | idx_1 = slice(None, -1, 2) 109 | idx_2 = slice(1, None, 2) 110 | elif stage == 'B': 111 | idx_1 = slice(1, -1, 2) 112 | idx_2 = slice(2, None, 2) 113 | else: 114 | raise Exception('Incorrect stage name (A or B)') 115 | 116 | s1 = th.sin(theta/2) 117 | c1 = th.cos(theta/2) 118 | s2 = th.sin(theta/2 + phi) 119 | c2 = th.cos(theta/2 + phi) 120 | 121 | device = th.device('cuda' if s1.is_cuda else 'cpu') 122 | u_re = th.ones(n_layers, D).to(device) 123 | v_re = th.zeros(n_layers, D).to(device) 124 | u_im = th.zeros(n_layers, D).to(device) 125 | v_im = th.zeros(n_layers, D).to(device) 126 | 127 | noise_U, noise_V = BS_noise 128 | 129 | if new: 130 | ur, vr, ui, vi = get_UV(theta, phi, BS_noise[0], BS_noise[1]) 131 | u_re[:, idx_1], u_re[:, idx_2] = ur 132 | v_re[:, idx_1], v_re[:, idx_2] = vr 133 | u_im[:, idx_1], u_im[:, idx_2] = ui 134 | v_im[:, idx_1], v_im[:, idx_2] = vi 135 | else: 136 | u_re[:, idx_1] = -s1*s2 * noise_U 137 | u_re[:, idx_2] = s1**2 * noise_U 138 | 139 | v_re[:, idx_1] = -c1*s1 * noise_V 140 | v_re[:, idx_2] = -c1*s2 * noise_V 141 | 142 | u_im[:, idx_1] = s1*c2 * noise_U 143 | u_im[:, idx_2] = -c1*s1 * noise_U 144 | 145 | v_im[:, idx_1] = c1**2 * noise_V 146 | v_im[:, idx_2] = c1*c2 * noise_V 147 | 148 | return u_re, v_re, u_im, v_im 149 | def UV_FFT(theta, phi, BS_noise=[1, 1], new=False): 150 | """ 151 | theta -- a (P, D//2) tensor : The internal phaseshifts 152 | phi -- (P, D//2) tensor : The external phaseshifts 153 | """ 154 | 155 | n_layers = theta.shape[0] 156 | P, D2 = phi.shape 157 | assert P, D2 == theta.shape 158 | D = D2 * 2 159 | assert D == 2**P 160 | 161 | # Calculate the sin's and cos's 162 | s1 = th.sin(theta/2) 163 | c1 = th.cos(theta/2) 164 | s2 = th.sin(theta/2 + phi) 165 | c2 = th.cos(theta/2 + phi) 166 | 167 | # Initialize UV 168 | device = th.device('cuda' if s1.is_cuda else 'cpu') 169 | u_re = th.zeros(n_layers, D).to(device) 170 | v_re = th.zeros(n_layers, D).to(device) 171 | u_im = th.zeros(n_layers, D).to(device) 172 | v_im = th.zeros(n_layers, D).to(device) 173 | 174 | uv = [u_re, v_re, u_im, v_im] 175 | 176 | # Concat uv so that they're like u1u1 u2u2, ... 177 | idx_1 = slice(None, None, 2) #::2 178 | idx_2 = slice(1, None, 2) #1::2 179 | 180 | noise_U, noise_V = BS_noise 181 | 182 | if new: 183 | ur, vr, ui, vi = get_UV(theta, phi, BS_noise[0], BS_noise[1]) 184 | u_re[:, idx_1], u_re[:, idx_2] = ur 185 | v_re[:, idx_1], v_re[:, idx_2] = vr 186 | u_im[:, idx_1], u_im[:, idx_2] = ui 187 | v_im[:, idx_1], v_im[:, idx_2] = vi 188 | else: 189 | u_re[:, idx_1] = -s1*s2 * noise_U 190 | u_re[:, idx_2] = s1**2 * noise_U 191 | v_re[:, idx_1] = -c1*s1 * noise_V 192 | v_re[:, idx_2] = -c1*s2 * noise_V 193 | u_im[:, idx_1] = s1*c2 * noise_U 194 | u_im[:, idx_2] = -c1*s1 * noise_U 195 | v_im[:, idx_1] = c1**2 * noise_V 196 | v_im[:, idx_2] = c1*c2 * noise_V 197 | 198 | 199 | # Put them in desired order 200 | for j in range(P): 201 | uv_idx, _ = fft_idx(P, j) 202 | u_re[j] = u_re[j, uv_idx] 203 | v_re[j] = v_re[j, uv_idx] 204 | u_im[j] = u_im[j, uv_idx] 205 | v_im[j] = v_im[j, uv_idx] 206 | 207 | return uv 208 | def perm_full(D, stage='A', complex='True'): 209 | perm = list(range(D)) 210 | if stage=='A': 211 | for i in range(D//2): 212 | perm[2*i], perm[2*i+1] = perm[2*i+1], perm[2*i] 213 | else: 214 | for i in range((D-1)//2): 215 | perm[2*i+1], perm[2*i+2] = perm[2*i+2], perm[2*i+1] 216 | 217 | return perm 218 | def layer_mult_full(X, UV, perm): 219 | """ 220 | Performs calculations equivalent to propgation through one layer of MZI. 221 | Args: 222 | X: (N, D)-th.Tensor representing the input with N being the batch size, D the dimension 223 | UV = [U_re, V_re, U_im, V_im]: The 1-D tensors containing the values of the transfer matrices of the MZI layer 224 | 225 | Returns: 226 | The output equivalent to U @ X where U would be the transfer matrix. 227 | """ 228 | 229 | N, D2 = X.shape 230 | assert D2 % 2 == 0 231 | D = D2//2 232 | 233 | U_real, V_real, U_imag, V_imag = UV 234 | X_re = X[:, :D] 235 | X_im = X[:, D:] 236 | 237 | X_re = X[:, :D] 238 | X_im = X[:, D:] 239 | sX_re = X_re[:, perm] 240 | sX_im = X_im[:, perm] 241 | 242 | Y_real = (U_real*X_re - U_imag*X_im) + (V_real*sX_re - V_imag*sX_im) 243 | Y_imag = (U_real*X_im + U_imag*X_re) + (V_real*sX_im + V_imag*sX_re) 244 | 245 | return th.cat((Y_real, Y_imag), 1) 246 | 247 | 248 | class FFTShaper(nn.Module): 249 | def __init__(self, D, dir, randomize=True): 250 | super().__init__() 251 | self.D = D 252 | self.D_pow_2 = 2**ceil(log2(D)) 253 | assert dir in ['in', 'out'] 254 | self.dir = dir 255 | 256 | perm = th.randperm(self.D_pow_2) 257 | self.idx = Parameter(perm[:D], requires_grad=False) 258 | #self.idx = th.arange(D) 259 | def forward(self, X): 260 | N, _ = X.shape 261 | device = th.device('cuda' if X.is_cuda else 'cpu') 262 | if self.dir == 'in': 263 | # Complex variables 264 | out = th.zeros(N, self.D_pow_2 * 2).to(device) 265 | out[:, self.idx] = X[:, :self.D] 266 | out[:, self.idx + self.D_pow_2] = X[:, self.D:] 267 | else: 268 | out = th.zeros(N, self.D * 2).to(device) 269 | out[:, :self.D] = X[:, self.idx] 270 | out[:, self.D:] = X[:, self.idx + self.D_pow_2] 271 | return out 272 | 273 | """ Unitary Modules """ 274 | class Unitary(nn.Module): 275 | """ 276 | Custom pytorch Module simulating an ONN unitary multiplier 277 | 278 | Attributes: 279 | D: The dimension of multiplier 280 | sigma_PS: The stdev of gaussian noise added to phaseshifter values 281 | sigma_BS: The stdev of gaussian noise added to beamsplitter transmission 282 | """ 283 | @classmethod 284 | def from_U(cls, U, numpy=False): 285 | """ 286 | U : a complex unitary numpy matrix 287 | returns a onn Unitary with weights set to emulate U 288 | """ 289 | if not numpy: 290 | D2 = U.shape[0] 291 | assert U.shape[1] == D2 292 | D = D2//2 293 | U_re = U[:D, :D].numpy() 294 | U_im = U[D:, :D].numpy() 295 | U = np.matrix(U_re + 1j*U_im) 296 | else: 297 | D = U.shape[0] 298 | assert U.shape[1] == D 299 | 300 | net = cls(D) 301 | for param, ang in zip(net.angles, unitary_decomp(U)): 302 | param.data = ang 303 | return net 304 | @classmethod 305 | def truncated(cls, n_layers): 306 | return partial(cls, n_layers=n_layers) 307 | def __init__(self, D, sigma_PS=0, sigma_BS=0, FFT=False, use_psi=True, n_layers=None, approx_sigma_bs=False): 308 | super().__init__() 309 | self.D = D 310 | if n_layers is None: 311 | self.n_layers = D 312 | else: 313 | self.n_layers = min(n_layers, D) 314 | self.use_psi = use_psi 315 | self.sigma_PS = sigma_PS 316 | self.sigma_BS = sigma_BS 317 | self.approx_sigma_bs = approx_sigma_bs 318 | self.init_params() 319 | def init_params(self): 320 | D = self.D 321 | n_layer_B = self.n_layers//2 322 | n_layer_A = self.n_layers - n_layer_B 323 | self.n_layers_A = n_layer_A 324 | self.n_layers_B = n_layer_B 325 | 326 | n_MZ_B = (D-1)//2 327 | n_MZ_A = D//2 328 | 329 | sin_A = th.rand(n_layer_A, n_MZ_A) 330 | sin_B = th.rand(n_layer_B, n_MZ_B) 331 | 332 | if False: 333 | Y_A = 2 * np.abs(np.arange(n_layer_A) * 2 - D/2) - 1 334 | Y_B = 2 * np.abs(np.arange(n_layer_B) * 2 - D/2) - 1 335 | X_A = 2 * np.abs(np.arange(n_MZ_A) * 2 - D/2) - 1 336 | X_B = 2 * np.abs(np.arange(n_MZ_B) * 2 - D/2) - 1 337 | 338 | XX_A, YY_A = np.meshgrid(X_A, Y_A) 339 | beta_A = D - np.maximum(XX_A, YY_A) 340 | 341 | alpha_A = np.ones_like(beta_A) 342 | sin_A = np.random.beta(alpha_A, beta_A) 343 | 344 | XX_B, YY_B = np.meshgrid(X_B, Y_B) 345 | beta_B = D - np.maximum(XX_B, YY_B) 346 | 347 | alpha_B = np.ones_like(beta_B) 348 | sin_B = np.random.beta(alpha_B, beta_B) 349 | 350 | sin_A = th.tensor(sin_A).float() 351 | sin_B = th.tensor(sin_B).float() 352 | 353 | self.phi_A = Parameter(th.rand(n_layer_A, n_MZ_A) * 1 * pi) 354 | self.phi_B = Parameter(th.rand(n_layer_B, n_MZ_B) * 1 * pi) 355 | self.theta_A = Parameter(th.asin(sin_A)) 356 | self.theta_B = Parameter(th.asin(sin_B)) 357 | 358 | # Phase shift at the end 359 | if self.use_psi: 360 | self.psi = Parameter(th.rand(D) * 2 * pi) 361 | else: 362 | self.psi = th.zeros(D) 363 | @property 364 | def angles(self): 365 | return [self.theta_A, self.phi_A, self.theta_B, self.phi_B, self.psi] 366 | def get_BS_noise(self): 367 | noise_UA = th.zeros_like(self.theta_A) 368 | noise_UB = th.zeros_like(self.theta_B) 369 | noise_VA = th.zeros_like(self.theta_A) 370 | noise_VB = th.zeros_like(self.theta_B) 371 | 372 | noise_UA.normal_() 373 | noise_UB.normal_() 374 | noise_VA.normal_() 375 | noise_VB.normal_() 376 | 377 | self.noise_A = [noise_UA, noise_VA] 378 | self.noise_B = [noise_UB, noise_VB] 379 | self.BS_noise_init = True 380 | def noisy_weights(self): 381 | """ 382 | Add guassian noise of stdev sigma to all the angles 383 | """ 384 | noisy_angles = [] 385 | for angle in self.angles[:-1]: 386 | device = th.device('cuda' if angle.is_cuda else 'cpu') 387 | noise = th.zeros_like(angle).to(device) 388 | noise.normal_() 389 | noisy_angles.append(angle + self.sigma_PS * noise) 390 | if self.use_psi: 391 | noise = th.zeros_like(self.psi) 392 | noise.normal_() 393 | noisy_angles.append(self.psi + self.sigma_PS * noise) 394 | else: 395 | noisy_angles.append(self.psi) 396 | 397 | return noisy_angles 398 | def get_UV(self): 399 | # If simulating PS noise 400 | if self.sigma_PS > 0: 401 | theta_A, phi_A, theta_B, phi_B, psi = self.noisy_weights() 402 | else: 403 | theta_A, phi_A, theta_B, phi_B, psi = self.angles 404 | 405 | # If simulating BS noise 406 | if self.sigma_BS > 0: 407 | self.get_BS_noise() 408 | 409 | # If approximating BS noise 410 | if self.approx_sigma_bs: 411 | UV_A = UV_MZ(self.D, theta_A, phi_A, 'A') 412 | UV_B = UV_MZ(self.D, theta_B, phi_B, 'B') 413 | d_UV_A = UV_MZ(self.D, theta_A + pi, phi_A + pi, 'A', BS_noise=self.noise_A) 414 | d_UV_B = UV_MZ(self.D, theta_B + pi, phi_B + pi, 'B', BS_noise=self.noise_B) 415 | 416 | UV_A = [UV + 2**0.5 * self.sigma_BS * dUV for (UV, dUV) in zip(UV_A, d_UV_A)] 417 | UV_B = [UV + 2**0.5 * self.sigma_BS * dUV for (UV, dUV) in zip(UV_B, d_UV_B)] 418 | else: 419 | UV_A = UV_MZ(self.D, theta_A, phi_A, 'A', BS_noise=[self.sigma_BS * x for x in self.noise_A], new=True) 420 | UV_B = UV_MZ(self.D, theta_B, phi_B, 'B', BS_noise=[self.sigma_BS * x for x in self.noise_B], new=True) 421 | else: 422 | UV_A = UV_MZ(self.D, theta_A, phi_A, 'A') 423 | UV_B = UV_MZ(self.D, theta_B, phi_B, 'B') 424 | 425 | return UV_A, UV_B, psi 426 | def forward(self, X): 427 | UV_A, UV_B, psi = self.get_UV() 428 | perm_A = perm_full(self.D, 'A') 429 | perm_B = perm_full(self.D, 'B') 430 | 431 | # Iternate over the layers 432 | num_layers_total = UV_A[0].shape[0] + UV_B[0].shape[0] 433 | #for n in range(self.D): 434 | for n in range(num_layers_total): 435 | if n % 2 == 0: 436 | uv = [w[n//2] for w in UV_A] 437 | perm = perm_A 438 | else: 439 | uv = [w[(n-1)//2] for w in UV_B] 440 | perm = perm_B 441 | X = layer_mult_full(X, uv, perm) 442 | 443 | # Add final phase shift 444 | if self.use_psi: 445 | X_re = X[:, :self.D] 446 | X_im = X[:, self.D:] 447 | U_real = th.cos(psi) 448 | U_imag = th.sin(psi) 449 | Y_real = (U_real*X_re - U_imag*X_im) 450 | Y_imag = (U_real*X_im + U_imag*X_re) 451 | 452 | X = th.cat((Y_real, Y_imag), 1) 453 | 454 | return X 455 | def emul_U(self, U, numpy=False): 456 | if not numpy: 457 | D2 = U.shape[0] 458 | assert U.shape[1] == D2 459 | D = D2//2 460 | U_re = U[:D, :D].numpy() 461 | U_im = U[D:, :D].numpy() 462 | U = np.matrix(U_re + 1j*U_im) 463 | else: 464 | D = U.shape[0] 465 | assert U.shape[1] == D 466 | 467 | assert D == self.D 468 | 469 | for param, ang in zip(self.angles, unitary_decomp(U)): 470 | param.data = ang 471 | def get_U(self, numpy=True): 472 | U = self(th.eye(self.D * 2)).data.t() 473 | U_re = U[:self.D, :self.D] 474 | U_im = U[self.D:, :self.D] 475 | if numpy: 476 | return np.matrix(U_re) + 1j * np.matrix(U_im) 477 | else: 478 | return U 479 | 480 | class FFTUnitary(nn.Module): 481 | def __init__(self, D, sigma_PS=0, sigma_BS=0, use_psi=True, approx_sigma_bs=False): 482 | assert D & (D - 1) == 0 # Check if power of 2 483 | super().__init__() 484 | self.D = D 485 | self.P = int(log2(D)) 486 | 487 | self.BS_noise_init = False 488 | 489 | self.use_psi = use_psi 490 | self.sigma_PS = sigma_PS 491 | self.sigma_BS = sigma_BS 492 | self.approx_sigma_bs = approx_sigma_bs 493 | self.init_params() 494 | def init_params(self): 495 | D = self.D 496 | P = self.P 497 | sin_theta = th.rand(P, D//2) 498 | self.theta = Parameter(th.asin(sin_theta)) 499 | self.phi = Parameter(th.rand(P, D//2) * 2 * pi) 500 | 501 | # Phase shift at the end 502 | if self.use_psi: 503 | self.psi = Parameter(th.rand(D) * 2 * pi) 504 | else: 505 | self.psi = th.zeros(D) 506 | 507 | self.angles = [self.theta, self.phi, self.psi] 508 | def noisy_weights(self): 509 | """ 510 | Add guassian noise of stdev sigma to all the angles 511 | """ 512 | noisy_angles = [] 513 | for angle in self.angles[:-1]: 514 | device = th.device('cuda' if angle.is_cuda else 'cpu') 515 | noise = th.zeros_like(angle).to(device) 516 | noise.normal_() 517 | noisy_angles.append(angle + self.sigma_PS * noise) 518 | if self.use_psi: 519 | noise = th.zeros_like(self.psi) 520 | noise.normal_() 521 | noisy_angles.append(self.psi + self.sigma_PS * noise) 522 | else: 523 | noisy_angles.append(self.psi) 524 | 525 | return noisy_angles 526 | def get_UV(self): 527 | # If simulating PS noise 528 | if self.sigma_PS > 0: 529 | theta, phi, psi = self.noisy_weights() 530 | else: 531 | theta, phi, psi = self.angles 532 | 533 | # If simulating BS noise 534 | if self.sigma_BS > 0: 535 | noise_U = th.zeros_like(self.theta) 536 | noise_V = th.zeros_like(self.theta) 537 | noise_U.normal_() 538 | noise_V.normal_() 539 | 540 | if self.approx_sigma_bs: 541 | UV = UV_FFT(self.theta, self.phi) 542 | d_UV = UV_FFT(self.theta + pi, self.phi + pi, BS_noise=[noise_U, noise_V]) 543 | UV = [UV + 2**0.5 * self.sigma_BS * dUV for (UV, dUV) in zip(UV, d_UV)] 544 | else: 545 | UV = UV_FFT(self.theta, self.phi, BS_noise=[self.sigma_BS * noise_U, self.sigma_BS * noise_V], new=True) 546 | else: 547 | UV = UV_FFT(self.theta, self.phi) 548 | return UV, psi 549 | 550 | 551 | def forward(self, X): 552 | UV, psi = self.get_UV() 553 | # Iternate over the layers 554 | for n in range(self.P): 555 | uv = [x[n] for x in UV] 556 | _, perm = fft_idx(self.P, n) 557 | X = layer_mult_full(X, uv, perm) 558 | 559 | # Add final phase shift 560 | if self.use_psi: 561 | X_re = X[:, :self.D] 562 | X_im = X[:, self.D:] 563 | U_real = th.cos(psi) 564 | U_imag = th.sin(psi) 565 | Y_real = (U_real*X_re - U_imag*X_im) 566 | Y_imag = (U_real*X_im + U_imag*X_re) 567 | 568 | X = th.cat((Y_real, Y_imag), 1) 569 | return X 570 | def get_U(self, numpy=True, as_param=False): 571 | if as_param: 572 | U = self(th.eye(self.D * 2)).t() 573 | else: 574 | U = self(th.eye(self.D * 2)).data.t() 575 | U_re = U[:self.D, :self.D] 576 | U_im = U[self.D:, :self.D] 577 | if numpy: 578 | return np.matrix(U_re) + 1j * np.matrix(U_im) 579 | else: 580 | return U 581 | 582 | class StackedFFTUnitary(nn.Sequential): 583 | def __init__(self, D, n_stack=None, sigma_PS=0, sigma_BS=0): 584 | if n_stack is None: 585 | P = int(log2(D)) 586 | n_stack = int(D//P) 587 | layers = [FFTUnitary(D, sigma_PS=sigma_PS, sigma_BS=sigma_BS) for _ in range(n_stack)] 588 | 589 | super().__init__(*layers) 590 | self.sigma_PS = sigma_PS 591 | self.sigma_BS = sigma_BS 592 | @property 593 | def sigma_PS(self): 594 | return self._sigma_PS 595 | @property 596 | def sigma_BS(self): 597 | return self._sigma_BS 598 | @sigma_PS.setter 599 | def sigma_PS(self, new_sig): 600 | # Updates sigma of all layers 601 | for layer in self: 602 | layer.sigma_PS = new_sig 603 | self._sigma_PS = new_sig 604 | @sigma_BS.setter 605 | def sigma_BS(self, new_sig): 606 | # Updates sigma of all layers 607 | for layer in self: 608 | layer.sigma_BS = new_sig 609 | self._sigma_BS = new_sig 610 | 611 | class HybridUnitary(Unitary): 612 | def forward(self, X): 613 | if self.sigma_PS > 0: 614 | theta_A, phi_A, theta_B, phi_B, psi = self.noisy_weights() 615 | else: 616 | theta_A, phi_A, theta_B, phi_B, psi = self.angles 617 | 618 | UV_A = UV_MZ(self.D, theta_A, phi_A, 'A') 619 | UV_B = UV_MZ(self.D, theta_B, phi_B, 'B') 620 | 621 | if self.sigma_BS > 0: 622 | d_UV_A = UV_MZ(self.D, theta_A + pi, phi_A + pi, 'A') 623 | d_UV_B = UV_MZ(self.D, theta_B + pi, phi_B + pi, 'B') 624 | 625 | if self.static_BS: 626 | if (self.noise_BS_B != None) and (self.noise_BS_A != None): 627 | noise_A = self.noise_BS_A 628 | noise_B = self.noise_BS_B 629 | else: 630 | noise_A = th.zeros_like(d_UV_A[0]) 631 | noise_B = th.zeros_like(d_UV_B[0]) 632 | noise_A.normal_() 633 | noise_B.normal_() 634 | self.noise_BS_A = noise_A 635 | self.noise_BS_B = noise_B 636 | else: 637 | noise_A = th.zeros_like(d_UV_A[0]) 638 | noise_B = th.zeros_like(d_UV_B[0]) 639 | noise_A.normal_() 640 | noise_B.normal_() 641 | 642 | 643 | UV_A = [UV + self.sigma_BS * noise_A * dUV for (UV, dUV) in zip(UV_A, d_UV_A)] 644 | UV_B = [UV + self.sigma_BS * noise_B * dUV for (UV, dUV) in zip(UV_B, d_UV_B)] 645 | 646 | perm_A = perm_full(self.D, 'A') 647 | perm_B = perm_full(self.D, 'B') 648 | 649 | # Iternate over the layers 650 | num_layers_total = UV_A[0].shape[0] + UV_B[0].shape[0] 651 | 652 | n_fft_perms = np.log2(self.D) 653 | n_layers_btw = int(num_layers_total // (n_fft_perms + 1)) 654 | self.n_layers_btw = n_layers_btw 655 | for n in range(num_layers_total): 656 | if n % 2 == 0: 657 | uv = [w[n//2] for w in UV_A] 658 | perm = perm_A 659 | else: 660 | uv = [w[(n-1)//2] for w in UV_B] 661 | perm = perm_B 662 | X = layer_mult_full(X, uv, perm) 663 | 664 | if (n % n_layers_btw == 0) and (n != 0) and (n <= n_fft_perms * n_layers_btw): 665 | u = int(n // n_layers_btw) - 1 666 | idx = np.arange(X.shape[1]) 667 | fft_perm = idx ^ 2**u 668 | X = X[:, fft_perm] 669 | 670 | 671 | 672 | # Add final phase shift 673 | if self.use_psi: 674 | X_re = X[:, :self.D] 675 | X_im = X[:, self.D:] 676 | U_real = th.cos(psi) 677 | U_imag = th.sin(psi) 678 | Y_real = (U_real*X_re - U_imag*X_im) 679 | Y_imag = (U_real*X_im + U_imag*X_re) 680 | 681 | X = th.cat((Y_real, Y_imag), 1) 682 | 683 | return X 684 | 685 | class CGRDUnitary(Unitary): 686 | @staticmethod 687 | def get_perms(N): 688 | n_perm = int(np.ceil(np.log2(N))) 689 | perm_loc = (N / n_perm) * np.arange(n_perm) 690 | perm_depth = N ** (np.arange(n_perm)/n_perm) 691 | perm_depth[0] = 0 692 | 693 | # Ensure they are divisible by 2 694 | perm_loc = (perm_loc // 2 * 2).astype(int) 695 | perm_depth = (np.ceil(perm_depth/2) * 2).astype(int) 696 | 697 | # Define even and odd permutations 698 | perm_A = np.arange(N) 699 | perm_B = np.arange(N) 700 | N_2 = N//2 * 2 701 | perm_A[:N_2:2] += 1 702 | perm_A[1:N_2:2] -= 1 703 | perm_B[1:N_2-1:2] += 1 704 | perm_B[2:N_2-1:2] -= 1 705 | 706 | perms = [] 707 | perm = np.arange(N) 708 | perm_idx = 0 709 | 710 | for i in range(N): 711 | if perm_idx == n_perm: 712 | break 713 | if i == perm_depth[perm_idx]: 714 | perms.append(perm.tolist()) 715 | perm_idx += 1 716 | 717 | permutation = perm_A if i%2 ==0 else perm_B 718 | perm[permutation] = perm.copy() 719 | return perm_loc[1:], perms[1:] 720 | def __init__(self, *args, **kwargs): 721 | super().__init__(*args, **kwargs) 722 | perm_loc, perms = self.get_perms(self.D) 723 | self.perm_dict = {} 724 | for loc, perm in zip(perm_loc, perms): 725 | self.perm_dict[loc] = perm + [x + self.D for x in perm] 726 | def forward(self, X): 727 | if self.sigma_PS > 0: 728 | theta_A, phi_A, theta_B, phi_B, psi = self.noisy_weights() 729 | else: 730 | theta_A, phi_A, theta_B, phi_B, psi = self.angles 731 | 732 | UV_A = UV_MZ(self.D, theta_A, phi_A, 'A') 733 | UV_B = UV_MZ(self.D, theta_B, phi_B, 'B') 734 | 735 | if self.sigma_BS > 0: 736 | d_UV_A = UV_MZ(self.D, theta_A + pi, phi_A + pi, 'A') 737 | d_UV_B = UV_MZ(self.D, theta_B + pi, phi_B + pi, 'B') 738 | 739 | if self.static_BS: 740 | if (self.noise_BS_B is not None) and (self.noise_BS_A is not None): 741 | noise_A = self.noise_BS_A 742 | noise_B = self.noise_BS_B 743 | else: 744 | noise_A = th.zeros_like(d_UV_A[0]) 745 | noise_B = th.zeros_like(d_UV_B[0]) 746 | noise_A.normal_() 747 | noise_B.normal_() 748 | self.noise_BS_A = noise_A 749 | self.noise_BS_B = noise_B 750 | else: 751 | noise_A = th.zeros_like(d_UV_A[0]) 752 | noise_B = th.zeros_like(d_UV_B[0]) 753 | noise_A.normal_() 754 | noise_B.normal_() 755 | 756 | 757 | UV_A = [UV + self.sigma_BS * noise_A * dUV for (UV, dUV) in zip(UV_A, d_UV_A)] 758 | UV_B = [UV + self.sigma_BS * noise_B * dUV for (UV, dUV) in zip(UV_B, d_UV_B)] 759 | 760 | perm_A = perm_full(self.D, 'A') 761 | perm_B = perm_full(self.D, 'B') 762 | 763 | # Iternate over the layers 764 | num_layers_total = UV_A[0].shape[0] + UV_B[0].shape[0] 765 | #for n in range(self.D): 766 | for n in range(num_layers_total): 767 | if n in self.perm_dict: 768 | idx = self.perm_dict[n] 769 | X = X[:, idx] 770 | if n % 2 == 0: 771 | uv = [w[n//2] for w in UV_A] 772 | perm = perm_A 773 | else: 774 | uv = [w[(n-1)//2] for w in UV_B] 775 | perm = perm_B 776 | X = layer_mult_full(X, uv, perm) 777 | 778 | # Add final phase shift 779 | if self.use_psi: 780 | X_re = X[:, :self.D] 781 | X_im = X[:, self.D:] 782 | U_real = th.cos(psi) 783 | U_imag = th.sin(psi) 784 | Y_real = (U_real*X_re - U_imag*X_im) 785 | Y_imag = (U_real*X_im + U_imag*X_re) 786 | 787 | X = th.cat((Y_real, Y_imag), 1) 788 | 789 | return X 790 | 791 | class StackedFFTUnitary(nn.Sequential): 792 | def __init__(self, D, n_stack=1, sigma_PS=0, sigma_BS=0): 793 | layers = [FFTUnitary(D, sigma_PS=sigma_PS, sigma_BS=sigma_BS) for _ in range(n_stack)] 794 | self.D = D 795 | super().__init__(*layers) 796 | self.sigma_PS = sigma_PS 797 | self.sigma_BS = sigma_BS 798 | @property 799 | def sigma_PS(self): 800 | return self._sigma_PS 801 | @property 802 | def sigma_BS(self): 803 | return self._sigma_BS 804 | @sigma_PS.setter 805 | def sigma_PS(self, new_sig): 806 | # Updates sigma of all layers 807 | for layer in self: 808 | layer.sigma_PS = new_sig 809 | self._sigma_PS = new_sig 810 | @sigma_BS.setter 811 | def sigma_BS(self, new_sig): 812 | # Updates sigma of all layers 813 | for layer in self: 814 | layer.sigma_BS = new_sig 815 | self._sigma_BS = new_sig 816 | def get_U(self, numpy=True): 817 | U = self(th.eye(self.D * 2)).data.t() 818 | U_re = U[:self.D, :self.D] 819 | U_im = U[self.D:, :self.D] 820 | if numpy: 821 | return np.matrix(U_re) + 1j * np.matrix(U_im) 822 | else: 823 | return U 824 | 825 | """ Diagonal Module """ 826 | class Diagonal(nn.Module): 827 | def __init__(self, D_in, D_out, sigma=0): 828 | super().__init__() 829 | self.D_in = D_in 830 | self.D_out = D_out 831 | self.D_min = min(D_in, D_out) 832 | self.sigma = sigma 833 | 834 | self.init_params() 835 | def init_params(self): 836 | sin_theta = th.rand(self.D_min) 837 | 838 | self.theta = Parameter(th.asin(sin_theta)) 839 | # Physically the amplification should happen at the nonlinearity but formally, it's cleaner here 840 | self.amp = Parameter(th.tensor(6.)) 841 | def forward(self, X): 842 | N, D2 = X.shape 843 | assert D2 % 2 == 0 844 | D = D2//2 845 | assert D == self.D_in 846 | 847 | noise = th.zeros_like(self.theta) 848 | noise.normal_() 849 | theta = self.theta + self.sigma*noise 850 | 851 | u = th.sin(theta/2) 852 | device = th.device('cuda' if X.is_cuda else 'cpu') 853 | Y = th.zeros(N, 2 * self.D_out).to(device) 854 | Y[:, :self.D_min] = u * X[:, :self.D_min] 855 | Y[:, self.D_out:self.D_out+self.D_min] = u * X[:, D:D+self.D_min] 856 | return self.amp * Y 857 | 858 | """ Linear Modules """ 859 | class Linear(nn.Module): 860 | @classmethod 861 | def from_M(cls, M, sigma_PS=0, sigma_BS=0, UNet=Unitary, numpy=False): 862 | if not numpy: 863 | Do2, Di2 = M.shape 864 | 865 | assert (Do2%2==0) and (Di2%2==0) 866 | Di = Di2//2 867 | Do = Do2//2 868 | M_re = M[:Do, :Di].numpy() 869 | M_im = M[Do:, :Di].numpy() 870 | M = np.matrix(M_re + 1j*M_im) 871 | else: 872 | Do, Di = M.shape 873 | 874 | net = cls(Di, Do, sigma_PS, sigma_BS, UNet) 875 | net.emul_M(M, numpy=True) 876 | return net 877 | def __init__(self, D_in, D_out, sigma_PS=0, sigma_BS=0, UNet=Unitary, FFT_shaper=False): 878 | super().__init__() 879 | self.D_in = D_in 880 | self.D_out = D_out 881 | 882 | # Define reshapers used for FFTNet 883 | self.in_shaper = self.out_shaper = None 884 | 885 | self.UNet = UNet 886 | 887 | if UNet == FFTUnitary: 888 | FFT_shaper = True 889 | if FFT_shaper: 890 | # Initialize the in/out shapers and obtain closest power of 2 dims 891 | D_in, D_out = self.init_fft() 892 | 893 | # SVD Decomp 894 | self.VH = UNet(D_in, sigma_PS=sigma_PS, sigma_BS=sigma_BS) 895 | self.S = Diagonal(D_in, D_out, sigma=sigma_PS) 896 | self.U = UNet(D_out, sigma_PS=sigma_PS, sigma_BS=sigma_BS) 897 | self.sigma_PS = sigma_PS 898 | self.sigma_BS = sigma_BS 899 | @property 900 | def sigma_PS(self): 901 | return self._sigma_PS 902 | @property 903 | def sigma_BS(self): 904 | return self._sigma_BS 905 | @sigma_PS.setter 906 | def sigma_PS(self, new_sig): 907 | # Updates sigma of all layers 908 | self.U.sigma_PS = new_sig 909 | self.VH.sigma_PS = new_sig 910 | self.S.sigma_PS = new_sig 911 | self._sigma_PS = new_sig 912 | @sigma_BS.setter 913 | def sigma_BS(self, new_sig): 914 | # Updates sigma of all layers 915 | self.U.sigma_BS = new_sig 916 | self.VH.sigma_BS = new_sig 917 | self.S.sigma_BS = new_sig 918 | self._sigma_BS = new_sig 919 | def init_fft(self): 920 | Di, Do = self.D_in, self.D_out 921 | if (Di - 1) & Di != 0: 922 | self.in_shaper = FFTShaper(Di, 'in') 923 | Di = self.in_shaper.D_pow_2 924 | if (Do - 1) & Do != 0: 925 | self.out_shaper = FFTShaper(Do, 'out') 926 | Do = self.out_shaper.D_pow_2 927 | return Di, Do 928 | def forward(self, X): 929 | #Make X a "complex vector" if not already 930 | N, D = X.shape 931 | if D == self.D_in: 932 | X = ct.make_batched_vec(X) 933 | else: 934 | assert D == self.D_in*2 935 | 936 | # Reshape input if needed for FFT 937 | if self.in_shaper is not None: 938 | X = self.in_shaper(X) 939 | 940 | X = self.VH(X) 941 | X = self.S(X) 942 | X = self.U(X) 943 | 944 | # Reshape output if needed for FFT 945 | if self.out_shaper is not None: 946 | X = self.out_shaper(X) 947 | return X 948 | def emul_M(self, M, numpy=False, rand_S=True): 949 | if self.UNet != Unitary: 950 | raise Exception('Decomposition of arbitrary matrices is only supported with GridUnitary.') 951 | if not numpy: 952 | Do2, Di2 = M.shape 953 | assert (Do2%2==0) and (Di2%2==0) 954 | Di = Di2//2 955 | Do = Do2//2 956 | M_re = M[:Do, :Di].numpy() 957 | M_im = M[Do:, :Di].numpy() 958 | M = np.matrix(M_re + 1j*M_im) 959 | else: 960 | Do, Di = M.shape 961 | 962 | U, S, VH = svd(M, rand_S=rand_S) 963 | 964 | S = th.tensor(S) 965 | amp = S.max() 966 | theta_diag = 2 * th.asin(S/amp) 967 | 968 | self.VH.emul_U(VH, True) 969 | self.U.emul_U(U, True) 970 | 971 | self.S = Diagonal(Di, Do) 972 | self.S.theta.data = theta_diag 973 | self.S.amp.data = amp 974 | def get_M(self, numpy=True): 975 | U = self(th.eye(self.D_in * 2)).data.t() 976 | U_re = U[:self.D_out, :self.D_in] 977 | U_im = U[self.D_out:, :self.D_in] 978 | if numpy: 979 | return np.matrix(U_re) + 1j * np.matrix(U_im) 980 | else: 981 | return U 982 | 983 | class TruncatedGridLinear(Linear): 984 | def __init__(self, D_in, D_out, sigma_PS=0, sigma_BS=0): 985 | super().__init__(D_in, D_out, sigma_PS, sigma_BS, UNet=Unitary) 986 | 987 | # Initialize the in/out shapers and obtain closest power of 2 dims 988 | D_in, D_out = self.init_fft() 989 | P_in = int(log2(D_in)) 990 | P_out = int(log2(D_out)) 991 | 992 | U_in = Unitary.truncated(P_in) 993 | U_out = Unitary.truncated(P_out) 994 | 995 | # SVD Decomp 996 | self.VH = U_in(D_in, sigma_PS=sigma_PS, sigma_BS=sigma_BS) 997 | self.S = Diagonal(D_in, D_out, sigma=sigma_PS) 998 | self.U = U_out(D_out, sigma_PS=sigma_PS, sigma_BS=sigma_BS) 999 | self.sigma_PS = sigma_PS 1000 | self.sigma_BS = sigma_BS 1001 | 1002 | class ComplexLinear(nn.Module): 1003 | def __init__(self, D_in, D_out, sigma=0, has_bias=False): 1004 | super().__init__() 1005 | self.D_in = D_in 1006 | self.D_out = D_out 1007 | self.has_bias = has_bias 1008 | self.init_params() 1009 | self.sigma = sigma 1010 | def init_params(self): 1011 | U = ct.rand_unitary(self.D_out) 1012 | S = th.zeros(self.D_out, self.D_in) 1013 | VH = ct.rand_unitary(self.D_in) 1014 | 1015 | sigma = 1/(self.D_in + self.D_out) ** 0.5 1016 | sigma *= 20 1017 | if self.D_out < self.D_in: 1018 | diag = th.randn(self.D_out) * sigma 1019 | S[:, :self.D_out] = th.diag(diag) 1020 | else: 1021 | diag = th.randn(self.D_in) * sigma 1022 | S[:self.D_in, :] = th.diag(diag) 1023 | S = ct.make_complex_matrix(S) 1024 | 1025 | M = (U@S@VH) 1026 | self.M_real = Parameter(M[:self.D_out, :self.D_in]) 1027 | self.M_imag = Parameter(M[self.D_out:, :self.D_in]) 1028 | 1029 | if self.has_bias: 1030 | self.bias = Parameter(th.Tensor(D_out*2)) 1031 | self.bias.data.uniform_(-sigma, sigma) 1032 | else: 1033 | self.register_parameter('bias', None) 1034 | @property 1035 | def weight(self): 1036 | return ct.make_complex_matrix(self.M_real, self.M_imag) 1037 | def set_weight(self, M): 1038 | self.M_real.data = M[:self.D_out, :self.D_in] 1039 | self.M_imag.data = M[self.D_out:, :self.D_in] 1040 | def forward(self, X): 1041 | if self.sigma > 0: 1042 | device = th.device('cuda' if self.weight.is_cuda else 'cpu') 1043 | noise = th.zeros_like(self.weight).to(device) 1044 | noise.normal_() 1045 | weight = self.weight + noise * self.sigma 1046 | else: 1047 | weight = self.weight 1048 | 1049 | return F.linear(X, weight, self.bias) 1050 | def get_M(self, numpy=True): 1051 | U = self(th.eye(self.D_in * 2)).data.t() 1052 | U_re = U[:self.D_out, :self.D_in] 1053 | U_im = U[self.D_out:, :self.D_in] 1054 | if numpy: 1055 | return np.matrix(U_re) + 1j * np.matrix(U_im) 1056 | else: 1057 | return U 1058 | 1059 | """ Nonlinearities """ 1060 | class ModNonlinearity(nn.Module): 1061 | def __init__(self, f=None): 1062 | """ 1063 | Impliments nonlinearity that acts on the magnitude of a complex vector, leaving the phase the same. 1064 | 1065 | f : the nonlinearity to be used. Should be from torch.nn.functional for backprop to work 1066 | """ 1067 | if f is None: 1068 | f = ShiftedSoftplus(0.1) 1069 | 1070 | super().__init__() 1071 | self.f = f 1072 | def forward(self, Z): 1073 | _, D = Z.shape 1074 | # Z should be already a complex vector 1075 | assert D % 2 == 0 1076 | 1077 | X = Z[:, :D//2] 1078 | Y = Z[:, D//2:] 1079 | 1080 | Z_abs = ct.norm_squared(Z)**0.5 1081 | W = self.f(Z_abs) 1082 | U = X * W / Z_abs 1083 | V = Y * W / Z_abs 1084 | 1085 | out = ct.make_batched_vec(U, V) 1086 | return out 1087 | 1088 | class ComplexNorm(nn.Module): 1089 | def __init__(self): 1090 | super().__init__() 1091 | def forward(self, X): 1092 | return ct.norm_squared(X) 1093 | 1094 | class ShiftedSoftplus(nn.Module): 1095 | def __init__(self, T): 1096 | super().__init__() 1097 | # Calculate bias 1098 | self.u0 = 0.5 * log(T**(-1) - 1) 1099 | def forward(self, X): 1100 | return 0.5 * (F.softplus(2 * (X - self.u0)) - log(1 + exp(-2*self.u0))) 1101 | 1102 | """ Other """ 1103 | class NoisySequential(nn.Sequential): 1104 | def __init__(self, *layers): 1105 | super().__init__(*layers) 1106 | def set_noise(self, PS, BS): 1107 | for l in self: 1108 | if isinstance(l, Linear): 1109 | l.sigma_BS = BS 1110 | l.sigma_PS = PS 1111 | 1112 | if __name__ == '__main__': 1113 | D = 4 1114 | X = th.randn(1, D * 2) 1115 | net = Unitary(D, sigma_BS=1e-2, approx_sigma_bs=False) 1116 | U = net.get_U() 1117 | print(U @ U.H) 1118 | -------------------------------------------------------------------------------- /plot_accuracies.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use("TkAgg") 3 | import matplotlib.pylab as plt 4 | import numpy as np 5 | from scipy.ndimage import uniform_filter 6 | from scipy import stats 7 | from train_mnist import * 8 | from default_params import * 9 | 10 | def compare_plot(accs, names, log_plot=False, f_name=None, q='Accuracy', acc_loss=False, colors=None): 11 | if acc_loss: 12 | accs = [a - a[0, 0] for a in accs] 13 | plt.ylabel('Change in Accuracy', fontsize=16) 14 | else: 15 | plt.ylabel('Classification Accuracy', fontsize=16) 16 | if log_plot: 17 | plot = plt.semilogy 18 | else: 19 | plot = plt.plot 20 | 21 | sigs = [] 22 | for acc in accs: 23 | sigs.append( 24 | np.linspace(0, 0.02, acc.shape[1]) 25 | ) 26 | 27 | if colors is None: 28 | colors = ['r', 'b', 'g', 'purple', 'orange'] 29 | for n in range(len(accs)): 30 | acc = accs[n] 31 | name = names[n] 32 | sig = sigs[n] 33 | c = colors[n] 34 | med = np.quantile(acc, 0.5, axis=0) 35 | high = np.quantile(acc, 0.8, axis=0) 36 | low = np.quantile(acc, 0.2, axis=0) 37 | plot(sig, med, color=c, label=f'Median {q} ({name})') 38 | plot(sig, high, color=c, linestyle='--', label=f'20/80% Quantile ({name})') 39 | plot(sig, low, color=c, linestyle='--') 40 | plt.fill_between(sig, low, high, color=c, alpha=.2) 41 | 42 | plt.xlim([0, 0.02]) 43 | plt.xticks([0, 0.005, 0.01, 0.015, 0.02], fontsize=14) 44 | ax = plt.gca() 45 | ax.set_xticks(np.arange(0, 0.02, 0.0025), minor=True) 46 | plt.yticks( fontsize=14) 47 | plt.xlabel(r'Component Error, $\sigma_{PS} = \sigma_{BS}$', fontsize=16) 48 | plt.grid(linestyle=':', which='both') 49 | plt.legend(fontsize=14) 50 | 51 | 52 | plt.tight_layout() 53 | if f_name: 54 | plt.savefig(f_name) 55 | def plot_psbs_acc(accuracies): 56 | mean = accuracies[:, :, ::-1].mean(0).T 57 | mean = accuracies.mean(0).T 58 | extent=[0, NOISY_TEST_MAX, 0.00, NOISY_TEST_MAX] 59 | plt.imshow(mean, extent=extent, cmap='inferno', origin='lower') 60 | cbar = plt.colorbar() 61 | ctr = plt.contour(uniform_filter(mean, 3), colors='white', extent=extent, linewidths=4) 62 | cbar.add_lines(ctr) 63 | 64 | plt.xticks([0, 0.005, 0.01, 0.015, 0.02], fontsize=14) 65 | plt.yticks([0, 0.005, 0.01, 0.015, 0.02], fontsize=14) 66 | plt.ylabel(r'Beamsplitter Error ($\sigma_{BS}$)', fontsize=16) 67 | plt.xlabel(r'Phaseshifter Error ($\sigma_{PS}$)', fontsize=16) 68 | #plt.title('Classification Accuracy of Noisy Grid Network', fontsize=18, y=1.08) 69 | plt.tight_layout() 70 | 71 | def plot_fft_psbs(f=F_FIG_FFT_PSBS): 72 | plt.cla() 73 | plt.clf() 74 | accuracies = np.load(F_FFT_ACC_PSBS) 75 | plot_psbs_acc(accuracies) 76 | plt.plot([0, 0.02], [0, 0.02], 'r--', linewidth=2) 77 | plt.savefig(f) 78 | def plot_grid_psbs(f=F_FIG_GRID_PSBS): 79 | plt.cla() 80 | plt.clf() 81 | accuracies = np.load(F_GRID_ACC_PSBS) 82 | plot_psbs_acc(accuracies) 83 | plt.plot([0, 0.02], [0, 0.02], 'b--', linewidth=2) 84 | plt.savefig(f) 85 | def plot_grid_vs_fft(): 86 | plt.cla() 87 | plt.clf() 88 | acc_grid = np.load(F_GRID_ACC_DIAG) 89 | acc_fft = np.load(F_FFT_ACC_DIAG) 90 | compare_plot([acc_grid, acc_fft], ['GridNet', 'FFTNet']) 91 | plt.savefig(F_FIG_COMPARE_GRID_FFT) 92 | def plot_rand_vs_ordered(): 93 | plt.cla() 94 | plt.clf() 95 | acc_rand = np.load(F_GRID_ACC_DIAG) 96 | acc_order = np.load(F_GRID_ORD_ACC_DIAG) 97 | compare_plot([acc_rand, acc_order], ['Random SV', 'Ordered SV']) 98 | plt.savefig(F_FIG_RAND_VS_ORD) 99 | def plot_compare_fft_diff_nh(): 100 | plt.cla() 101 | plt.clf() 102 | accs = [] 103 | names = [] 104 | dir = os.path.join(DIR_NOISY_TEST, 'fft_net_diff_nh') 105 | files = os.listdir(dir) 106 | files = sorted(files, key=lambda f_name:int(f_name[:-4]), reverse=True) 107 | print(files) 108 | for f in files: 109 | f_path = os.path.join(dir, f) 110 | accs.append( 111 | np.load(f_path) 112 | ) 113 | names.append( 114 | f'FFTNet, D={f[:-4]}' 115 | ) 116 | compare_plot(accs, names) 117 | def plot_fft_diff_depth(): 118 | plt.cla() 119 | plt.clf() 120 | accs = [] 121 | names = [] 122 | dir = os.path.join(DIR_RESULTS, 'fft_net_diff_depth') 123 | files = os.listdir(dir) 124 | files = sorted(files, key=lambda f_name:int(f_name[:-4]), reverse=True) 125 | print(files) 126 | for f in files: 127 | f_path = os.path.join(dir, f) 128 | accs.append( 129 | np.load(f_path) 130 | ) 131 | names.append( 132 | f'FFTNet, D={f[:-4]}' 133 | ) 134 | compare_plot(accs, names) 135 | def truncated_vs_fft(): 136 | f_out_trunc = os.path.join(DIR_RESULTS, 'trunc_fid.npy') 137 | f_out_fft = os.path.join(DIR_RESULTS, 'fft_fid.npy') 138 | 139 | fid_trunc = np.load(f_out_trunc) 140 | fid_fft = np.load(f_out_fft) 141 | 142 | compare_plot([fid_trunc, fid_fft], ['TruncGrid', 'FFTUnitary'], acc_loss=False, q='Fidelity', colors=[(1, .3, 0), 'blue']) 143 | plt.ylabel('Fidelity') 144 | plt.savefig(os.path.join(DIR_FIGS, 'truncated_vs_fft.pdf')) 145 | plt.show() 146 | def stacked_vs_grid(): 147 | f_out_grid = os.path.join(DIR_RESULTS, 'unitary_fidelity.npy') 148 | f_out_fft_32 = os.path.join(DIR_RESULTS, 'stacked_fft_32_fid.npy') 149 | 150 | fid_grid = np.load(f_out_grid) 151 | fid_fft_32 = np.load(f_out_fft_32) 152 | 153 | compare_plot([fid_grid, fid_fft_32], ['GridUnitary', 'StackedFFT'], acc_loss=False, q='Fidelity', colors=['red', 'green']) 154 | plt.ylabel('Fidelity') 155 | plt.savefig(os.path.join(DIR_FIGS, 'stacked_vs_grid_fidelity.pdf')) 156 | plt.show() 157 | 158 | if __name__ == '__main__': 159 | 160 | plot_rand_vs_ordered() 161 | 162 | assert False 163 | acc_grid = np.load(F_GRID_ACC_DIAG) 164 | acc_stacked_fft = np.load( 165 | os.path.join(DIR_NOISY_TEST, 'stacked_fft_diag.npy') 166 | ) 167 | acc_layer_1 = np.load( 168 | os.path.join(DIR_NOISY_TEST, 'grid_1_layer.npy') 169 | ) 170 | acc_stacked_1 = np.load( 171 | os.path.join(DIR_NOISY_TEST, 'stacked_fft_1.npy') 172 | ) 173 | compare_plot([acc_grid, acc_stacked_fft], ['GridNet', 'StackedFFT-Net'], acc_loss=False, colors=['red', 'green']) 174 | plt.savefig(os.path.join(DIR_FIGS, 'stacked_vs_grid_accuracy.pdf')) 175 | plt.show() 176 | 177 | assert False 178 | 179 | acc_grid = np.load(F_GRID_ACC_DIAG) 180 | #acc_grid_ord = np.load(F_GRID_ORD_ACC_DIAG) 181 | #acc_fft_ = np.load(F_FFT_ACC_DIAG + '_') 182 | acc_fft = np.load(F_FFT_ACC_PSBS) 183 | acc_fft = acc_fft[:, np.arange(21), np.arange(21)] 184 | #acc_fft = np.array([np.diag(acc) for acc in acc_grid]) 185 | 186 | #acc_fft = np.load(F_FFT_ACC_DIAG) 187 | sigs = NOISY_TEST_SIGMAS 188 | compare_plot(acc_grid, acc_fft, 'FFT_', 'FFT') 189 | plt.show() 190 | 191 | assert False 192 | f_name = './figures/acc_compare.pdf' 193 | acc_grid = np.load('./noisy_grid_infr_256/accuracies.npy') 194 | acc_grid = np.array([ 195 | np.diag(acc) for acc in acc_grid 196 | ]) 197 | acc_fft = np.load('./noisy_fft_93/accuracies.npy') 198 | acc_fft = np.array([ 199 | np.diag(acc) for acc in acc_fft 200 | ]) 201 | sigs_1 = np.linspace(0, 0.02, 20).tolist() 202 | sigs_2 = np.linspace(0, 0.02, 21).tolist() 203 | 204 | acc_rand = np.load('./noisy_grid_infr_256/shuffled_accuracies.npy') 205 | acc_hyb = np.load('hybrid_accuracies.npy') 206 | acc_grid_2 = np.load('./results/noisy_test/grid_net_diag.npy') 207 | 208 | #compare_plot(acc_1, acc_2, False, f_name='./figures/acc_compare.pdf') 209 | #compare_plot(acc_rand, acc_grid, 'Shuffled', 'Ordered', False, f_name='./figures/compare_permute.pdf', q='Acc. Loss', acc_loss=True) 210 | #compare_plot(acc_rand, acc_hyb, 'Grid', 'BlockFFT', False, f_name='./figures/compare_hybrid.pdf', q='Acc. Loss', acc_loss=True) 211 | compare_plot(acc_rand, acc_grid_2, 'Grid', 'Grid_2', False, f_name='./figures/compare_grid.pdf', q='Accuracy') 212 | #compare_plot(acc_1, acc_2, 'Grid', 'BlockFFT', False, f_name='./figures/compare_hybrid.png') 213 | plt.ylabel('Accuracy Loss', fontsize=14) 214 | plt.legend(fontsize=14) 215 | plt.savefig('./figures/compare_hybrid.pdf') 216 | #plt.ylim([0.6, 1]) 217 | plt.show() 218 | assert False 219 | err_1, err_fit, m, b = get_fit_err(sigs_1, acc_1) 220 | err_2, err_fit2, m2, b2 = get_fit_err(sigs_2, acc_2) 221 | compare_plot(err_1, err_2, sigs_1, sigs_2, log_plot=True, f_name=None) 222 | label = r'$\epsilon \approx %.3f \times e^{ %d \sigma }$' % (np.exp(b), m) 223 | plt.plot(sigs_1, err_fit, 'm.', label=label) 224 | label = r'$\epsilon \approx %.3f \times e^{ %d \sigma }$' % (np.exp(b2), m2) 225 | plt.plot(sigs_2, err_fit2, 'c.', label=label) 226 | 227 | plt.show() 228 | -------------------------------------------------------------------------------- /results/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/.DS_Store -------------------------------------------------------------------------------- /results/fft_fid.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/fft_fid.npy -------------------------------------------------------------------------------- /results/fft_net_diff_depth/1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/fft_net_diff_depth/1.npy -------------------------------------------------------------------------------- /results/fft_net_diff_depth/2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/fft_net_diff_depth/2.npy -------------------------------------------------------------------------------- /results/fft_net_diff_depth/3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/fft_net_diff_depth/3.npy -------------------------------------------------------------------------------- /results/fft_net_diff_depth/4.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/fft_net_diff_depth/4.npy -------------------------------------------------------------------------------- /results/localized_noise/U.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/localized_noise/U.npy -------------------------------------------------------------------------------- /results/localized_noise/U_fft.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/localized_noise/U_fft.npy -------------------------------------------------------------------------------- /results/localized_noise/U_ord.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/localized_noise/U_ord.npy -------------------------------------------------------------------------------- /results/localized_noise/V.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/localized_noise/V.npy -------------------------------------------------------------------------------- /results/localized_noise/V_fft.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/localized_noise/V_fft.npy -------------------------------------------------------------------------------- /results/localized_noise/V_ord.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/localized_noise/V_ord.npy -------------------------------------------------------------------------------- /results/noisy_test/FFTNet_diag.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/noisy_test/FFTNet_diag.npy -------------------------------------------------------------------------------- /results/noisy_test/FFTNet_psbs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/noisy_test/FFTNet_psbs.npy -------------------------------------------------------------------------------- /results/noisy_test/GridNet_diag.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/noisy_test/GridNet_diag.npy -------------------------------------------------------------------------------- /results/noisy_test/GridNet_ordered_SV_diag.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/noisy_test/GridNet_ordered_SV_diag.npy -------------------------------------------------------------------------------- /results/noisy_test/GridNet_psbs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/noisy_test/GridNet_psbs.npy -------------------------------------------------------------------------------- /results/noisy_test/fft_net_diff_nh/1024.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/noisy_test/fft_net_diff_nh/1024.npy -------------------------------------------------------------------------------- /results/noisy_test/fft_net_diff_nh/256.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/noisy_test/fft_net_diff_nh/256.npy -------------------------------------------------------------------------------- /results/noisy_test/fft_net_diff_nh/4096.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/noisy_test/fft_net_diff_nh/4096.npy -------------------------------------------------------------------------------- /results/noisy_test/grid_1_layer.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/noisy_test/grid_1_layer.npy -------------------------------------------------------------------------------- /results/noisy_test/stacked_fft_1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/noisy_test/stacked_fft_1.npy -------------------------------------------------------------------------------- /results/noisy_test/stacked_fft_diag.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/noisy_test/stacked_fft_diag.npy -------------------------------------------------------------------------------- /results/stacked_fft_32_fid.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/stacked_fft_32_fid.npy -------------------------------------------------------------------------------- /results/trunc_fid.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/trunc_fid.npy -------------------------------------------------------------------------------- /results/unitary_fidelity.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/results/unitary_fidelity.npy -------------------------------------------------------------------------------- /train_mnist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch as th 3 | import matplotlib.pylab as plt 4 | from optical_nn import * 5 | import complex_torch_var as ct 6 | from mnist import * 7 | import os 8 | from time import time 9 | from functools import partial 10 | from glob import glob 11 | from default_params import * 12 | 13 | DIR_PATH = os.path.dirname(os.path.realpath(__file__)) 14 | 15 | # Good learning rates for different networks 16 | LR_FFT = 5e-2 17 | LR_GRID = 2.5e-4 18 | LR_COMPLEX = 5e-3 19 | 20 | def train_complex(f=F_COMPLEX_TRAIN, n_h=[256, 256]): 21 | train_params = {} 22 | train_params['n_epochs'] = 5 23 | train_params['log_interval'] = 100 24 | train_params['batch_size'] = 100 25 | 26 | optim_params = {} 27 | optim_params['lr'] = 5e-3 28 | optim_params['momentum'] = .9 29 | 30 | net = mnist_complex(hidden_units=n_h) 31 | print(net) 32 | train(net, **train_params, optim_params=optim_params) 33 | optim_params['lr'] /= 5 34 | train(net, **train_params, optim_params=optim_params) 35 | acc = get_acc(net) 36 | print(f'Trained ComplexNet with accuracy {acc}.') 37 | if f: 38 | th.save(net.state_dict(), f) 39 | print(f'Saved model to {f}.') 40 | def train_cgrd(f=F_CGRD_TRAIN): 41 | train_params = {} 42 | train_params['n_epochs'] = 5 43 | train_params['log_interval'] = 100 44 | train_params['batch_size'] = 100 45 | 46 | optim_params = {} 47 | optim_params['lr'] = LR_GRID 48 | optim_params['momentum'] = .9 49 | 50 | net = mnist_ONN(unitary=CGRDUnitary) 51 | if f: 52 | th.save(net.state_dict(), f) 53 | print(f'Saved model to {f}.') 54 | if f: 55 | th.save(net.state_dict(), f) 56 | print(f'Saved model to {f}.') 57 | train(net, **train_params, optim_params=optim_params) 58 | optim_params['lr'] /= 5 59 | train(net, **train_params, optim_params=optim_params) 60 | acc = get_acc(net) 61 | print(f'Trained ComplexNet with accuracy {acc}.') 62 | if f: 63 | th.save(net.state_dict(), f) 64 | print(f'Saved model to {f}.') 65 | def train_fft(f=F_FFT_TRAIN, n_h=[256, 256]): 66 | train_params = {} 67 | train_params['n_epochs'] = 5 68 | train_params['log_interval'] = 100 69 | train_params['batch_size'] = 100 70 | 71 | optim_params = {} 72 | optim_params['lr'] = LR_FFT * 3 73 | optim_params['momentum'] = .9 74 | 75 | net = mnist_ONN(FFTUnitary, hidden_units=n_h) 76 | print(net) 77 | train(net, **train_params, optim_params=optim_params) 78 | optim_params['lr'] /= 5 79 | train(net, **train_params, optim_params=optim_params) 80 | acc = get_acc(net) 81 | print(f'Trained FFTNet with accuracy {acc}.') 82 | if f: 83 | th.save(net, f) 84 | print(f'Saved model to {f}.') 85 | def convert_save_grid_net(complex_net=None, f=None, rand_S=True): 86 | if complex_net is None: 87 | complex_net = load_complex() 88 | if f is None: 89 | f = F_GRID_TRAIN if rand_S else F_GRID_ORD_TRAIN 90 | grid_net = complex_net.to_grid_net(rand_S=rand_S).to(DEVICE) 91 | acc = get_acc(grid_net) 92 | print(f'Converted to GridNet with accuracy {acc} with {"shuffled" if rand_S else "ordered"} singular values.') 93 | th.save(grid_net.state_dict(), f) 94 | print(f'Saved GridNet at {f}') 95 | def batch_train_complex(n_train, dir = DIR_COMPLEX_TRAIN): 96 | for _ in range(n_train): 97 | f = os.path.join(dir, f'{time():.0f}') 98 | train_complex(f=f) 99 | def batch_convert(dir = DIR_COMPLEX_TRAIN): 100 | for f in glob(os.path.join(dir, '*')): 101 | net = load_complex(f) 102 | convert_save_grid_net(net, f=f+'_grid') 103 | def load_complex(f=F_COMPLEX_TRAIN): 104 | net = mnist_complex() 105 | net.load_state_dict(th.load(f, map_location=DEVICE)) 106 | acc = get_acc(net) 107 | print(f'ComplexNet loaded from {f} with accuracy {acc}.') 108 | return net.to(DEVICE) 109 | def load_grid(f=None, rand_S=True, report_acc=True): 110 | if f is None: 111 | f = F_GRID_TRAIN if rand_S else F_GRID_ORD_TRAIN 112 | net = mnist_ONN() 113 | net.load_state_dict(th.load(f, map_location=DEVICE)) 114 | if report_acc: 115 | acc = get_acc(net) 116 | print(f'GridNet loaded from {f} with accuracy {acc}.') 117 | else: 118 | print(f'GridNet loaded from {f}.') 119 | return net.to(DEVICE) 120 | def load_fft(f=F_FFT_TRAIN): 121 | net = mnist_ONN(FFTUnitary) 122 | net.load_state_dict(th.load(f, map_location=DEVICE)) 123 | acc = get_acc(net) 124 | print(f'FFTNet loaded from {f} with accuracy {acc}.') 125 | return net.to(DEVICE) 126 | 127 | if __name__ == '__main__': 128 | 129 | net = load_fft() 130 | 131 | for data, target in mnist_loader(train=False, batch_size=100, shuffle=False): 132 | continue 133 | data = data.view(-1, 28**2) 134 | data, target = data.to(DEVICE), target.to(DEVICE) 135 | print(th.max(net(data), dim=1)) 136 | -------------------------------------------------------------------------------- /trained_models/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/.DS_Store -------------------------------------------------------------------------------- /trained_models/GridUnitary_256.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/GridUnitary_256.pth -------------------------------------------------------------------------------- /trained_models/_.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/_.pth -------------------------------------------------------------------------------- /trained_models/cgrd.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/cgrd.pth -------------------------------------------------------------------------------- /trained_models/complex_1_layer.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/complex_1_layer.pth -------------------------------------------------------------------------------- /trained_models/complex_net.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/complex_net.pth -------------------------------------------------------------------------------- /trained_models/complex_net/1550179045: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/complex_net/1550179045 -------------------------------------------------------------------------------- /trained_models/complex_net/1550179474: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/complex_net/1550179474 -------------------------------------------------------------------------------- /trained_models/complex_net/1550179545: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/complex_net/1550179545 -------------------------------------------------------------------------------- /trained_models/complex_net/1550179616: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/complex_net/1550179616 -------------------------------------------------------------------------------- /trained_models/complex_net/1550179687: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/complex_net/1550179687 -------------------------------------------------------------------------------- /trained_models/complex_net/1550179759: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/complex_net/1550179759 -------------------------------------------------------------------------------- /trained_models/complex_net/1550179830: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/complex_net/1550179830 -------------------------------------------------------------------------------- /trained_models/complex_net/1550179902: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/complex_net/1550179902 -------------------------------------------------------------------------------- /trained_models/complex_net/1550179973: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/complex_net/1550179973 -------------------------------------------------------------------------------- /trained_models/complex_net/1550180044: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/complex_net/1550180044 -------------------------------------------------------------------------------- /trained_models/complex_net/1550180115: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/complex_net/1550180115 -------------------------------------------------------------------------------- /trained_models/fft_net.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/fft_net.pth -------------------------------------------------------------------------------- /trained_models/fft_net_16384_.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/fft_net_16384_.pth -------------------------------------------------------------------------------- /trained_models/fft_net_diff_depth/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/fft_net_diff_depth/1 -------------------------------------------------------------------------------- /trained_models/fft_net_diff_depth/2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/fft_net_diff_depth/2 -------------------------------------------------------------------------------- /trained_models/fft_net_diff_depth/3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/fft_net_diff_depth/3 -------------------------------------------------------------------------------- /trained_models/fft_net_diff_depth/4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/fft_net_diff_depth/4 -------------------------------------------------------------------------------- /trained_models/fft_net_diff_depth/5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/fft_net_diff_depth/5 -------------------------------------------------------------------------------- /trained_models/fft_net_diff_nh/1024: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/fft_net_diff_nh/1024 -------------------------------------------------------------------------------- /trained_models/fft_net_diff_nh/256: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/fft_net_diff_nh/256 -------------------------------------------------------------------------------- /trained_models/fft_net_diff_nh/4096: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/fft_net_diff_nh/4096 -------------------------------------------------------------------------------- /trained_models/fft_net_nh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/fft_net_nh -------------------------------------------------------------------------------- /trained_models/grid_1_layer.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_1_layer.pth -------------------------------------------------------------------------------- /trained_models/grid_net.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_net.pth -------------------------------------------------------------------------------- /trained_models/grid_net/1550179045_grid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_net/1550179045_grid -------------------------------------------------------------------------------- /trained_models/grid_net/1550179474_grid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_net/1550179474_grid -------------------------------------------------------------------------------- /trained_models/grid_net/1550179545_grid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_net/1550179545_grid -------------------------------------------------------------------------------- /trained_models/grid_net/1550179616_grid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_net/1550179616_grid -------------------------------------------------------------------------------- /trained_models/grid_net/1550179687_grid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_net/1550179687_grid -------------------------------------------------------------------------------- /trained_models/grid_net/1550179759_grid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_net/1550179759_grid -------------------------------------------------------------------------------- /trained_models/grid_net/1550179830_grid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_net/1550179830_grid -------------------------------------------------------------------------------- /trained_models/grid_net/1550179902_grid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_net/1550179902_grid -------------------------------------------------------------------------------- /trained_models/grid_net/1550179973_grid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_net/1550179973_grid -------------------------------------------------------------------------------- /trained_models/grid_net/1550180044_grid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_net/1550180044_grid -------------------------------------------------------------------------------- /trained_models/grid_net/1550180115_grid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_net/1550180115_grid -------------------------------------------------------------------------------- /trained_models/grid_net_ordered_SV.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_net_ordered_SV.pth -------------------------------------------------------------------------------- /trained_models/grid_ord_net/1550179045: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_ord_net/1550179045 -------------------------------------------------------------------------------- /trained_models/grid_ord_net/1550179474: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_ord_net/1550179474 -------------------------------------------------------------------------------- /trained_models/grid_ord_net/1550179545: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_ord_net/1550179545 -------------------------------------------------------------------------------- /trained_models/grid_ord_net/1550179616: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_ord_net/1550179616 -------------------------------------------------------------------------------- /trained_models/grid_ord_net/1550179687: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_ord_net/1550179687 -------------------------------------------------------------------------------- /trained_models/grid_ord_net/1550179759: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_ord_net/1550179759 -------------------------------------------------------------------------------- /trained_models/grid_ord_net/1550179830: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_ord_net/1550179830 -------------------------------------------------------------------------------- /trained_models/grid_ord_net/1550179902: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_ord_net/1550179902 -------------------------------------------------------------------------------- /trained_models/grid_ord_net/1550179973: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_ord_net/1550179973 -------------------------------------------------------------------------------- /trained_models/grid_ord_net/1550180044: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_ord_net/1550180044 -------------------------------------------------------------------------------- /trained_models/grid_ord_net/1550180115: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/grid_ord_net/1550180115 -------------------------------------------------------------------------------- /trained_models/stacked_fft_1.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/stacked_fft_1.pth -------------------------------------------------------------------------------- /trained_models/stacked_fft_32.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/stacked_fft_32.pth -------------------------------------------------------------------------------- /trained_models/truncated_grid.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike-fang/imprecise_optical_neural_network/2ed452fb4b6fd6105ff45bd6a60483f940246f09/trained_models/truncated_grid.pth -------------------------------------------------------------------------------- /unitary_decomp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import svd 3 | from scipy.stats import unitary_group 4 | import math 5 | from math import pi 6 | from complex_torch_var import * 7 | import logging 8 | from time import time 9 | #from optical_nn_2 import DiagLayer 10 | 11 | log = logging.getLogger(__name__) 12 | logging.basicConfig(level=logging.ERROR) 13 | 14 | def random_complex_mat(N, M=None): 15 | if M == None: 16 | M = N 17 | U = np.random.randn(N, M) 18 | V = np.random.randn(N, M) 19 | return np.matrix(U + 1j * V) 20 | def rand_unitary(N): 21 | U = unitary_group.rvs(N) 22 | return np.matrix(U) 23 | def rand_vec(D=2, complex=True): 24 | X = np.random.randn(D) 25 | Y = np.random.randn(D) 26 | if complex: 27 | return X + 1j * Y 28 | else: 29 | return X 30 | def rand_theta_phi(): 31 | theta, phi = np.random.uniform(0, np.pi, size=2) 32 | return theta, 2 * phi 33 | 34 | def U_BS(alpha=np.pi/2): 35 | c = math.cos(alpha/2) 36 | s = math.sin(alpha/2) 37 | return np.matrix( 38 | [[c, 1j * s], 39 | [1j * s, c]] 40 | ) 41 | def U_PS(theta): 42 | phase = np.exp(1j * np.array([theta, 0])) 43 | return np.matrix(np.diag(phase)) 44 | def U_MZ(theta, phi, a1=np.pi/2, a2=np.pi/2): 45 | if (a1 == np.pi/2) and (a2 == np.pi/2): 46 | M = np.matrix( 47 | [ 48 | [np.exp(1j * phi) * np.sin(theta/2), np.cos(theta/2)], 49 | [np.exp(1j * phi) * np.cos(theta/2), -np.sin(theta/2)] 50 | ] 51 | ) 52 | return 1j * np.exp(1j * theta/2) * M 53 | 54 | # If note 50-50 multiply through the components 55 | return U_BS(a1) @ U_PS(theta) @ U_BS(a2) @ U_PS(phi) 56 | def get_null_angle(X, flip=False): 57 | """ 58 | Given a complex 2-vector X, find angles theta, phi such that 59 | U_MZ(theta, phi) @ X = [1, 0] (or [0, 1] when flip == True) 60 | """ 61 | 62 | if isinstance(X, np.matrix): 63 | X = np.array(X).flatten() 64 | X_mag = np.absolute(X) 65 | phis = np.angle(X) 66 | theta = np.arctan(X_mag[0] / X_mag[1]) * 2 67 | phi = (phis[1] - phis[0]) 68 | 69 | if flip == True: 70 | theta = (np.pi - theta) 71 | phi += np.pi 72 | 73 | 74 | phi %= 2 * math.pi 75 | return theta, phi 76 | def get_sub_T(N, i, j, T): 77 | M = np.eye(N, dtype='complex128') 78 | M[i, i] = T[0, 0] 79 | M[i, j] = T[0, 1] 80 | M[j, i] = T[1, 0] 81 | M[j, j] = T[1, 1] 82 | 83 | return np.matrix(M) 84 | def get_ij(N, s, k, backward=False): 85 | """ 86 | Returns indices (i, j) of U that the kth step of stage s nulls 87 | """ 88 | if backward: 89 | i = N - (s + 1 - k) 90 | j = k 91 | else: 92 | i = N - 1 - k 93 | j = s - k 94 | return i , j 95 | def get_nl(D, s, k): 96 | """ 97 | Returns the physical locations of the MZI associated with the angles obtained in stage s, step k 98 | n: The MZI swaps channels (n, n+1) 99 | l: The MZI is in layer l 100 | """ 101 | 102 | n_max = D - 2 103 | l_max = D - 1 104 | 105 | n = s - k 106 | l = k 107 | if s % 2 == 1: 108 | n = n_max - n 109 | l = l_max - l 110 | return n, l 111 | def null_element(U, i, j, backward=False): 112 | log.info(f'Nulling element U[{i}, {j}], {"backward" if backward else "forward"} stage') 113 | N, _ = U.shape 114 | if backward: 115 | X = U[[i-1, i], j] 116 | theta, phi = get_null_angle(X, flip=False) 117 | T = U_MZ(theta, phi) 118 | U[[i-1, i], :] = T @ U[[i-1, i], :] 119 | else: 120 | X = U[i, [j, j+1]] 121 | theta, phi = get_null_angle(X.H, flip=True) 122 | T = U_MZ(theta, phi) 123 | U[:, [j, j+1]] = U[:, [j, j+1]] @ T.H 124 | epsilon = 1e-8 125 | 126 | if np.abs(U[i, j]) > epsilon: 127 | log.warning(f'The element U[{i}, {j}] was not nulled within tolerance of {epsilon}, its abs value is {np.abs(U[i, j]):.2e}') 128 | else: 129 | log.info('Element sucessfully nulled') 130 | return (theta, phi) 131 | """ 132 | Decomposes an unitary matrix to a series of SU(2) implimented by MZIs as described 133 | by Clements et. al (2017) 134 | 135 | U: the unitary matrix to be decopsed 136 | 137 | returns 138 | coords : coordinates of the MZIs 139 | angles : a (N (N - 1) / 2) x 2 array of thetas, phis which parametrize the MZIs 140 | D : a N vector which gives the residual phase shifts 141 | """ 142 | # Makes a copy of U to return to as all operations are in place. 143 | if reset_U: 144 | U0 = U.copy() 145 | N, _ = U.shape 146 | n_stages = N - 1 147 | 148 | angles_f = [] 149 | angles_b = [] 150 | coords_f = [] 151 | coords_b = [] 152 | MZI_loc_f = [] 153 | MZI_loc_b = [] 154 | 155 | # Build the coordinates of operation 156 | for s in range(n_stages): 157 | # Odd iterations are backward stages 158 | backward_stage = (s % 2 == 1) 159 | for k in range(s + 1): 160 | n, l = get_nl(N, s, k) 161 | i, j = get_ij(N, s, k, backward=backward_stage) 162 | theta_phi = null_element(U, i, j, backward=backward_stage) 163 | if backward_stage: 164 | coords_b.append((i-1, i)) 165 | angles_b.append(theta_phi) 166 | MZI_loc_b.append((l, n)) 167 | else: 168 | coords_f.append((j, j+1)) 169 | angles_f.append(theta_phi) 170 | MZI_loc_f.append((l, n)) 171 | 172 | # Reverse order of backward operations 173 | angles_b.reverse() 174 | coords_b.reverse() 175 | MZI_loc_b.reverse() 176 | 177 | # Put forward and backwards together 178 | coords = coords_f + coords_b 179 | MZI_loc = MZI_loc_f + MZI_loc_b 180 | 181 | def swap_T_D(theta_phi, D): 182 | # Find T_ and D_ such that T.H @ D = D_ @ T_ 183 | theta, phi = theta_phi 184 | 185 | # Get phases of D 186 | psis = np.array(np.angle(D.diagonal()))[0] 187 | psi0 = psis[1] 188 | psi = psis[0] - psi0 189 | 190 | # Get new angles 191 | theta_ = theta 192 | phi_ = psi 193 | psi_ = -phi 194 | psi0_ = psi0 - theta + np.pi 195 | 196 | # Make new D 197 | D_ = np.exp(1j * psi0_) * U_PS(psi_) 198 | T_ = U_MZ(theta_, phi_) 199 | 200 | return (theta_, phi_), D_ 201 | 202 | # Put angles in layers 203 | n_back = len(angles_b) 204 | for n in range(n_back): 205 | theta, phi = angles_b[n] 206 | i, j = coords_b[n] 207 | D = U[[i, j]][:, [i, j]] 208 | (theta_, phi_), D_ = swap_T_D((theta, phi), D) 209 | angles_b[n] = (theta_, phi_) 210 | #T_ = U_MZ(theta_, phi_) 211 | #sub_T_ = get_sub_T(N, i, j, T_) 212 | U[i, i] = D_[0, 0] 213 | U[j, j] = D_[1, 1] 214 | 215 | 216 | return angles_f 217 | # Put angles together and set to be in (0, 2pi) 218 | angles = angles_f + angles_b 219 | D = np.angle(np.diag(U)) % (2 * np.pi) 220 | 221 | # Brings U back to original input 222 | if reset_U: 223 | U = U0 224 | 225 | layered_angles = [[None,] * ((N)//2) for _ in range(N)] 226 | 227 | for angle, (l, n) in zip(angles, MZI_loc): 228 | if l % 2 == 0: 229 | i = n//2 230 | else: 231 | i = (n-1)//2 232 | layered_angles[l][i] = angle 233 | 234 | return layered_angles 235 | return coords, np.array(angles), D 236 | def unitary_decomp(U, in_place=False): 237 | if U.dtype != 'complex128': 238 | U = U.astype('complex128') 239 | """ 240 | Decomposes an unitary matrix to a series of SU(2) implimented by MZIs as described 241 | by Clements et. al (2017) 242 | 243 | U: the unitary matrix to be decopsed 244 | 245 | returns 246 | theta/phi_A/B : angles of the phase shifters at each layer 247 | psi : residual phase shift 248 | """ 249 | if not in_place: 250 | U = U.copy() 251 | 252 | N, _ = U.shape 253 | n_stages = N - 1 254 | 255 | angles_f = [] 256 | angles_b = [] 257 | coords_f = [] 258 | coords_b = [] 259 | MZI_loc_f = [] 260 | MZI_loc_b = [] 261 | 262 | # Build the coordinates of operation 263 | for s in range(n_stages): 264 | # Odd iterations are backward stages 265 | backward_stage = (s % 2 == 1) 266 | for k in range(s + 1): 267 | n, l = get_nl(N, s, k) 268 | i, j = get_ij(N, s, k, backward=backward_stage) 269 | theta_phi = null_element(U, i, j, backward=backward_stage) 270 | if backward_stage: 271 | coords_b.append((i-1, i)) 272 | angles_b.append(theta_phi) 273 | MZI_loc_b.append((l, n)) 274 | else: 275 | coords_f.append((j, j+1)) 276 | angles_f.append(theta_phi) 277 | MZI_loc_f.append((l, n)) 278 | 279 | # Reverse order of backward operations 280 | angles_b.reverse() 281 | coords_b.reverse() 282 | MZI_loc_b.reverse() 283 | 284 | 285 | # Put forward and backwards together 286 | coords = coords_f + coords_b 287 | MZI_loc = MZI_loc_f + MZI_loc_b 288 | 289 | def swap_T_D(theta_phi, D): 290 | # Find T_ and D_ such that T.H @ D = D_ @ T_ 291 | theta, phi = theta_phi 292 | 293 | # Get residual phases 294 | psis = np.array(np.angle(D.diagonal()))[0] 295 | psi0 = psis[1] 296 | psi = psis[0] - psi0 297 | 298 | # Get new angles 299 | theta_ = theta 300 | phi_ = psi 301 | psi_ = -phi 302 | psi0_ = psi0 - theta + np.pi 303 | 304 | # Make new D 305 | D_ = np.exp(1j * psi0_) * U_PS(psi_) 306 | T_ = U_MZ(theta_, phi_) 307 | 308 | return (theta_, phi_), D_ 309 | 310 | # Put angles in layers 311 | n_back = len(angles_b) 312 | for n in range(n_back): 313 | theta, phi = angles_b[n] 314 | i, j = coords_b[n] 315 | D = U[[i, j]][:, [i, j]] 316 | (theta_, phi_), D_ = swap_T_D((theta, phi), D) 317 | angles_b[n] = (theta_, phi_) 318 | #T_ = U_MZ(theta_, phi_) 319 | #sub_T_ = get_sub_T(N, i, j, T_) 320 | U[i, i] = D_[0, 0] 321 | U[j, j] = D_[1, 1] 322 | 323 | 324 | # Put angles together and set to be in (0, 2pi) 325 | angles = angles_f + angles_b 326 | psi = np.angle(np.diag(U)) % (2 * np.pi) 327 | 328 | # Initialize theta/phi_A/B 329 | n_layer_B = N//2 330 | n_layer_A = N - n_layer_B 331 | n_MZ_B = (N-1)//2 332 | n_MZ_A = N//2 333 | theta_A = th.zeros(n_layer_A, n_MZ_A) * 2 * pi 334 | phi_A = th.zeros(n_layer_A, n_MZ_A) * 2 * pi 335 | theta_B = th.zeros(n_layer_B, n_MZ_B) * 2 * pi 336 | phi_B = th.zeros(n_layer_B, n_MZ_B) * 2 * pi 337 | theta_phis = [theta_A, phi_A, theta_B, phi_B] 338 | for n in range(4): 339 | theta_phis[n] = theta_phis[n].float() 340 | 341 | for angle, (l, n) in zip(angles, MZI_loc): 342 | if l % 2 == 0: 343 | i = n//2 344 | theta_A[l//2, i] = angle[0] 345 | phi_A[l//2, i] = angle[1] 346 | else: 347 | i = (n-1)//2 348 | theta_B[(l-1)//2, i] = angle[0] 349 | phi_B[(l-1)//2, i] = angle[1] 350 | 351 | return theta_A, phi_A, theta_B, phi_B, th.tensor(psi).float() 352 | def diag_decomp(S): 353 | """ 354 | Given the diagonal of a non-negative diagonal matrix S (as a vector), find the angles (theta, phi) that impliments attenuation. Note that S will be normalized first so that the largest value will be 1 and all others less than 1. 355 | """ 356 | # Normalize S 357 | scale = S.max() 358 | S_ = S/scale 359 | thetas = 2 * np.arcsin(S_) 360 | phis = -thetas/2 - np.pi/2 361 | return np.vstack((thetas, phis)).T, float(scale) 362 | 363 | if __name__ == '__main__': 364 | D = 4 365 | U_im = np.eye(D)[::-1] 366 | U = np.matrix(-1j * U_im) 367 | print(type(U)) 368 | for x in unitary_decomp(U): 369 | print(x/np.pi) 370 | --------------------------------------------------------------------------------