├── Dataset ├── readme └── InverseDataset.xlsx ├── model.py ├── README.md ├── training_manager.py ├── preprocess.py └── FE-PINN_Example.ipynb /Dataset/readme: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Dataset/InverseDataset.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahyar-jahaninasab/Feature-Enforcing-PINN/HEAD/Dataset/InverseDataset.xlsx -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import math 2 | import copy 3 | import time 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.autograd import grad 10 | import torch.optim as optim 11 | from tqdm import tqdm 12 | from functools import partial 13 | from matplotlib import cm 14 | from scipy.stats import qmc 15 | import matplotlib.pyplot as plt 16 | from pyDOE import lhs 17 | from mpl_toolkits.axes_grid1 import make_axes_locatable 18 | from typing import List, Callable, Union 19 | 20 | 21 | 22 | DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 23 | 24 | class PINN_Net(nn.Module): 25 | def __init__(self, PDE: str, hidden_sizes: List[int], ub: List[Union[int, float]], lb: List[Union[int, float]], activation: Callable): 26 | super(PINN_Net, self).__init__() 27 | 28 | if PDE == 'Navier_Stokes': 29 | self.input_size = 2 30 | self.output_size = 6 31 | elif PDE == 'Heat_Conduction': 32 | self.input_size = 2 33 | self.output_size = 1 34 | elif PDE == 'Burger': 35 | self.input_size = 2 36 | self.output_size = 1 37 | else: 38 | raise ValueError("Unsupported PDE type") 39 | 40 | self.hidden_sizes = hidden_sizes 41 | self.act = activation 42 | self.ub = torch.tensor(ub, dtype=torch.float32).to(DEVICE) 43 | self.lb = torch.tensor(lb, dtype=torch.float32).to(DEVICE) 44 | 45 | # Input layer 46 | self.fc1 = nn.Linear(self.input_size, self.hidden_sizes[0]) 47 | nn.init.xavier_uniform_(self.fc1.weight) 48 | # Hidden layers 49 | self.hidden_layers = nn.ModuleList() 50 | for k in range(len(hidden_sizes)-1): 51 | layer = nn.Linear(hidden_sizes[k], hidden_sizes[k+1]) 52 | nn.init.xavier_uniform_(layer.weight) 53 | self.hidden_layers.append(layer) 54 | # Output layer 55 | self.fc2 = nn.Linear(self.hidden_sizes[-1], self.output_size) 56 | nn.init.xavier_uniform_(self.fc2.weight) 57 | 58 | def forward(self, x: torch.Tensor) -> torch.Tensor: 59 | # Input layer 60 | x = (x - self.lb) / (self.ub - self.lb) 61 | out = x 62 | out = self.act(self.fc1(out)) 63 | # Hidden layers 64 | for layer in self.hidden_layers: 65 | out = self.act(layer(out)) 66 | # Output layer 67 | out = self.fc2(out) 68 | return out 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Citation: Mahyar Jahani-nasab, Bijarchi Mohammad Ali, "Enhancing convergence speed with feature enforcing physics-informed neural networks using boundary conditions as prior knowledge." Sci Rep 14, 23836 (2024). 2 | 3 | 4 | # Enhancing Convergence Speed with Feature-Enforcing Physics-Informed Neural Networks: Utilizing Boundary Conditions as Prior Knowledge for Faster Convergence 5 | 6 | This repository contains the implementation of a novel accelerated training method for Vanilla Physics-Informed-Neural-Networks (PINN). The method addresses three factors that imbalance the loss function: 7 | 8 | 1. Initial weight state of a neural network 9 | 2. Domain to boundary points ratio 10 | 3. Loss weighting factor 11 | ## Abstract: 12 | This study introduces an accelerated training method for Vanilla Physics-Informed-Neural-Networks (PINN) addressing three factors that imbalance the loss function: initial weight state of a neural network, domain to boundary points ratio, and loss weighting factor. We propose a novel two-stage training method. During the initial stage, we create a unique loss function using a subset of boundary conditions and partial differential equation terms. Furthermore, we introduce preprocessing procedures that aim to decrease the variance during initialization and choose domain points according to the initial weight state of various neural networks. The second phase resembles Vanilla-PINN training, but a portion of the random weights are substituted with weights from the first phase. This implies that the neural network's structure is designed to prioritize the boundary conditions, subsequently affecting the overall convergence. Three benchmarks are utilized: two-dimensional flow over a cylinder, an inverse problem of inlet velocity determination, and the Burger equation. It is found that incorporating weights generated in the first training phase into the structure of a neural network neutralizes the effects of imbalance factors. For instance, in the first benchmark, as a result of our process, the second phase of training is balanced across a wide range of ratios and is not affected by the initial state of weights, while the Vanilla-PINN failed to converge in most cases. Lastly, the initial training process not only eliminates the need for hyperparameter tuning to balance the loss function, but it also outperforms the Vanilla-PINN in terms of speed. 13 | 14 | ## Two-Stage Training Method 15 | 16 | We propose a two-stage training method: 17 | 18 | 1. **Initial Stage**: A unique loss function is created using a subset of boundary conditions and partial differential equation terms. We introduce preprocessing procedures that aim to decrease the variance during initialization and choose domain points according to the initial weight state of various neural networks. 19 | 20 | 2. **Second Phase**: This phase resembles Vanilla-PINN training, but a portion of the random weights are substituted with weights from the first phase. This implies that the neural network's structure is designed to prioritize the boundary conditions, subsequently affecting the overall convergence. 21 | 22 | ## Benchmarks 23 | 24 | Three benchmarks are utilized: 25 | 26 | 1. Two-dimensional flow over a cylinder 27 | 2. An inverse problem of inlet velocity determination 28 | 3. The Burger equation 29 | 30 | Incorporating weights generated in the first training phase into the structure of a neural network neutralizes the effects of imbalance factors. For instance, in the first benchmark, as a result of our process, the second phase of training is balanced across a wide range of ratios and is not affected by the initial state of weights, while the Vanilla-PINN failed to converge in most cases. 31 | 32 | ## Contact 33 | 34 | If you have any questions or need further clarification, feel free to reach out to me. You can email me at: 35 | mahyarjahaninasab [at] gmail [dot] com 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /training_manager.py: -------------------------------------------------------------------------------- 1 | from preprocess import * 2 | from model import * 3 | 4 | class PINNManager(): 5 | def __init__(self,*args): 6 | super(PINNManager, self).__init__() 7 | 8 | self.pinn = args[0] 9 | self.lr= args[1] if args[1] is not None else 0.001 10 | self.PDE = args[2] 11 | self.optim = optim.Adam(self.pinn.parameters(),lr=self.lr) 12 | 13 | def predict_navier(self,x): 14 | 15 | out = self.pinn.forward(x) 16 | u = out[:, 0:1] 17 | v = out[:, 1:2] 18 | p = out[:, 2:3] 19 | sig_xx = out[:, 3:4] 20 | sig_xy = out[:, 4:5] 21 | sig_yy = out[:, 5:6] 22 | return u, v, p, sig_xx, sig_xy, sig_yy 23 | 24 | def predict_conduction(self,x): 25 | T = self.pinn.forward(x) 26 | return T 27 | 28 | def predict_burger(self,x): 29 | U = self.pinn.forward(x) 30 | return U 31 | 32 | def pde_loss_navier(self,x_,rho=1,mu=0.02): 33 | x = x_.clone() 34 | x.requires_grad = True 35 | u, v, p, sig_xx, sig_xy, sig_yy = self.predict_navier(x) 36 | rho = 1 37 | mu = 0.02 38 | u_out = grad(u.sum(), x, create_graph=True)[0] 39 | v_out = grad(v.sum(), x, create_graph=True)[0] 40 | sig_xx_out = grad(sig_xx.sum(), x, create_graph=True)[0] 41 | sig_xy_out = grad(sig_xy.sum(), x, create_graph=True)[0] 42 | sig_yy_out = grad(sig_yy.sum(), x, create_graph=True)[0] 43 | u_x = u_out[:, 0:1] 44 | u_y = u_out[:, 1:2] 45 | v_x = v_out[:, 0:1] 46 | v_y = v_out[:, 1:2] 47 | sig_xx_x = sig_xx_out[:, 0:1] 48 | sig_xy_x = sig_xy_out[:, 0:1] 49 | sig_xy_y = sig_xy_out[:, 1:2] 50 | sig_yy_y = sig_yy_out[:, 1:2] 51 | # continuity equation 52 | f0 = u_x + v_y 53 | # navier-stokes equation 54 | f1 = rho * (u * u_x + v * u_y) - sig_xx_x - sig_xy_y 55 | f2 = rho * (u * v_x + v * v_y) - sig_xy_x - sig_yy_y 56 | # cauchy stress tensor 57 | f3 = -p + 2 * mu * u_x - sig_xx 58 | f4 = -p + 2 * mu * v_y - sig_yy 59 | f5 = mu * (u_y + v_x) - sig_xy 60 | mse_f0 = torch.mean(torch.square(f0)) 61 | mse_f1 = torch.mean(torch.square(f1)) 62 | mse_f2 = torch.mean(torch.square(f2)) 63 | mse_f3 = torch.mean(torch.square(f3)) 64 | mse_f4 = torch.mean(torch.square(f4)) 65 | mse_f5 = torch.mean(torch.square(f5)) 66 | mse_pde = mse_f0 + mse_f1 + mse_f2 + mse_f3 + mse_f4 + mse_f5 67 | return mse_pde 68 | 69 | def point_selection_navier(self,x_,percent=0.1,rho=1,mu=0.02): 70 | x = x_.clone() 71 | x.requires_grad = True 72 | u, v, p, sig_xx, sig_xy, sig_yy = self.predict_navier(x) 73 | rho = 1 74 | mu = 0.02 75 | u_out = grad(u.sum(), x, create_graph=True)[0] 76 | v_out = grad(v.sum(), x, create_graph=True)[0] 77 | sig_xx_out = grad(sig_xx.sum(), x, create_graph=True)[0] 78 | sig_xy_out = grad(sig_xy.sum(), x, create_graph=True)[0] 79 | sig_yy_out = grad(sig_yy.sum(), x, create_graph=True)[0] 80 | u_x = u_out[:, 0:1] 81 | u_y = u_out[:, 1:2] 82 | v_x = v_out[:, 0:1] 83 | v_y = v_out[:, 1:2] 84 | sig_xx_x = sig_xx_out[:, 0:1] 85 | sig_xy_x = sig_xy_out[:, 0:1] 86 | sig_xy_y = sig_xy_out[:, 1:2] 87 | sig_yy_y = sig_yy_out[:, 1:2] 88 | # continuity equation 89 | f0 = u_x + v_y 90 | # navier-stokes equation 91 | f1 = rho * (u * u_x + v * u_y) - sig_xx_x - sig_xy_y 92 | f2 = rho * (u * v_x + v * v_y) - sig_xy_x - sig_yy_y 93 | # cauchy stress tensor 94 | f3 = -p + 2 * mu * u_x - sig_xx 95 | f4 = -p + 2 * mu * v_y - sig_yy 96 | f5 = mu * (u_y + v_x) - sig_xy 97 | pde_score = torch.square(f0) + torch.square(f1) + torch.square(f2) + torch.square(f3) + torch.square(f4) + torch.square(f5) 98 | #sort dastaset base on pde_score in asscending way 99 | dataset = torch.concat([x_,pde_score], axis=1) 100 | last_column = dataset[:, -1] 101 | sorted_indices = torch.argsort(last_column) 102 | split_indices = torch.split(sorted_indices, torch.unique(last_column, sorted=False, return_counts=True)[1].tolist()) 103 | 104 | sorted_data = torch.cat([torch.index_select(dataset, 0, indices) for indices in split_indices]) 105 | total_points = int(len(sorted_data) - percent*len(sorted_data)) 106 | chosen_points = sorted_data[total_points:].clone() 107 | return chosen_points[:,:2] 108 | 109 | 110 | def pde_loss_conduction(self,x_): 111 | x = x_.clone() 112 | x.requires_grad = True 113 | T = self.predict_conduction(x) 114 | T_out = grad(T.sum(), x,create_graph=True)[0] 115 | T_x = T_out[:, 0:1] 116 | T_y = T_out[:, 1:2] 117 | T_x_out = grad(T_x.sum(), x, create_graph=True)[0] 118 | T_xx = T_x_out[:, 0:1] 119 | del T_x_out 120 | T_y_out = grad(T_y.sum(), x, create_graph=True)[0] 121 | T_yy = T_y_out[:, 1:2] 122 | del T_y_out 123 | f0 = T_xx + T_yy 124 | mse_f0 = torch.mean(torch.square(f0)) 125 | mse_pde = mse_f0 126 | return mse_pde 127 | 128 | def point_selection_conduction(self,x_,percent=0.1): 129 | x = x_.clone() 130 | x.requires_grad = True 131 | T = self.predict_conduction(x) 132 | T_out = grad(T.sum(), x,create_graph=True)[0] 133 | T_x = T_out[:, 0:1] 134 | T_y = T_out[:, 1:2] 135 | T_x_out = grad(T_x.sum(), x, create_graph=True)[0] 136 | T_xx = T_x_out[:, 0:1] 137 | del T_x_out 138 | T_y_out = grad(T_y.sum(), x, create_graph=True)[0] 139 | T_yy = T_y_out[:, 1:2] 140 | del T_y_out 141 | f0 = T_xx + T_yy 142 | pde_score = torch.square(f0) 143 | #sort dastaset base on pde_score in asscending way 144 | dataset = torch.concat([x_,pde_score], axis=1) 145 | last_column = dataset[:, -1] 146 | sorted_indices = torch.argsort(last_column) 147 | split_indices = torch.split(sorted_indices, torch.unique(last_column, sorted=False, return_counts=True)[1].tolist()) 148 | sorted_data = torch.cat([torch.index_select(dataset, 0, indices) for indices in split_indices]) 149 | total_points = int(len(sorted_data) - percent*len(sorted_data)) 150 | chosen_points = sorted_data[total_points:].clone() 151 | return chosen_points[:,:2] 152 | 153 | def pde_loss_burger(self,x1,viscosity=0.01): 154 | x = x1.clone() 155 | x.requires_grad = True 156 | u = self.predict_burger(x) 157 | u_out = grad(u.sum(), x,create_graph=True)[0] 158 | u_x = u_out[:,0:1] 159 | u_t = u_out[:,1:2] 160 | u_x_out = grad(u_x.sum(), x, create_graph=True)[0] 161 | u_xx = u_x_out[:,0:1] 162 | del u_x_out 163 | f0 = u_t + u*u_x - viscosity*u_xx 164 | mse_f0 = torch.mean(torch.square(f0)) 165 | return mse_f0 166 | def point_selection_burger(self,x_,viscosity=0.01,percent=0.1): 167 | x = x_.clone() 168 | x.requires_grad = True 169 | u = self.predict_burger(x) 170 | u_out = grad(u.sum(), x,create_graph=True)[0] 171 | u_x = u_out[:,0:1] 172 | u_t = u_out[:,1:2] 173 | u_x_out = grad(u_x.sum(), x, create_graph=True)[0] 174 | u_xx = u_x_out[:,0:1] 175 | del u_x_out 176 | f0 = u_t + u*u_x - viscosity*u_xx 177 | pde_score = torch.square(f0) 178 | dataset = torch.concat([x_,pde_score], axis=1) 179 | last_column = dataset[:, -1] 180 | sorted_indices = torch.argsort(last_column) 181 | split_indices = torch.split(sorted_indices, torch.unique(last_column, sorted=False, return_counts=True)[1].tolist()) 182 | sorted_data = torch.cat([torch.index_select(dataset, 0, indices) for indices in split_indices]) 183 | total_points = int(len(sorted_data) - percent*len(sorted_data)) 184 | chosen_points = sorted_data[total_points:].clone() 185 | return chosen_points[:,:2] 186 | 187 | def bc_loss_navier(self, x_bd, value_bnd): 188 | u, v = self.predict_navier(x_bd)[0:2] 189 | mse_bc = torch.mean(torch.square(u - value_bnd[:, 0:1])) + torch.mean(torch.square(v - value_bnd[:, 1:2])) 190 | return mse_bc 191 | 192 | def outlet_loss_navier(self, x): 193 | p,_ = self.predict_navier(x)[2:4] 194 | mse_outlet = torch.mean(torch.square(p)) 195 | return mse_outlet 196 | 197 | def bc_loss_prediction(self, x_bd, value_bnd): 198 | T = self.predict_burger(x_bd) 199 | mse_bc = torch.mean(torch.square(T - value_bnd)) 200 | return mse_bc 201 | 202 | 203 | 204 | def adam_optimizer(self,EPOCHS,xy_col,xy_bd,uv_bd,threshold = 1e-20,landa=1): 205 | ''' 206 | Training on the Primary Loss Function for the first benchmark introduced in the paper 207 | ''' 208 | losse_bc= [] 209 | losses_pde = [] 210 | if self.PDE == 'Navier_Stokes': 211 | for epoch in tqdm(range(EPOCHS)): 212 | self.optim.zero_grad() 213 | mse_bc = self.bc_loss_navier(xy_bd,uv_bd) 214 | mse_pde = self.pde_loss_navier(xy_col) 215 | loss = mse_bc + mse_pde 216 | loss.backward() 217 | losse_bc.append(mse_bc.detach().cpu().item()) 218 | losses_pde.append(mse_pde.detach().cpu().item()) 219 | self.optim.step() 220 | if (epoch+1) % 1000 == 0: 221 | print('Epoch: {}, Loss: {}, bc: {}, PDE: {}'.format(epoch, loss.item(),mse_bc.item(),mse_pde.item())) 222 | elif self.PDE == 'Heat_Conduction': 223 | for epoch in tqdm(range(EPOCHS)): 224 | self.optim.zero_grad() 225 | mse_bc = self.bc_loss_prediction(xy_bd,uv_bd) 226 | mse_pde = self.pde_loss_conduction(xy_col) 227 | loss = landa*mse_bc + mse_pde 228 | loss.backward() 229 | losse_bc.append(mse_bc.detach().cpu().item()) 230 | losses_pde.append(mse_pde.detach().cpu().item()) 231 | self.optim.step() 232 | if (epoch+1) % 1000 == 0: 233 | print('Epoch: {}, Loss: {}, bc: {}, PDE: {}'.format(epoch, loss.item(),mse_bc.item(),mse_pde.item())) 234 | if loss.detach().cpu().item() <= threshold: 235 | return losse_bc,losses_pde,self.pinn 236 | elif self.PDE == 'Burger': 237 | best_loss = 1000000 238 | best_epoch = 0 239 | for epoch in tqdm(range(EPOCHS)): 240 | self.optim.zero_grad() 241 | mse_bc = self.bc_loss_prediction(xy_bd,uv_bd) 242 | mse_pde = self.pde_loss_burger(xy_col) 243 | loss = landa*mse_bc + mse_pde 244 | loss.backward() 245 | losse_bc.append(mse_bc.detach().cpu().item()) 246 | losses_pde.append(mse_pde.detach().cpu().item()) 247 | self.optim.step() 248 | if loss.item() < best_loss: 249 | best_loss = loss.item() 250 | best_epoch = epoch 251 | if (epoch+1) % 1000 == 0: 252 | print('Epoch: {}, Loss: {}, bc: {}, PDE: {}'.format(epoch, loss.item(),mse_bc.item(),mse_pde.item())) 253 | if loss.detach().cpu().item() <= threshold: 254 | return losse_bc,losses_pde,self.pinn 255 | if loss.detach().cpu().item() >= 1000: 256 | print('Model Diverged') 257 | return losse_bc,losses_pde,self.pinn 258 | 259 | if epoch - best_epoch > 10000: 260 | print('No improvement in 10000 epochs, stopping') 261 | return losse_bc,losses_pde,self.pinn 262 | 263 | else: 264 | raise ValueError("Unsupported PDE type") 265 | 266 | return losse_bc,losses_pde,self.pinn 267 | 268 | def lbfgs_optimizer(self,Epochs,xy_col,xy_bd,uv_bd,outlet_xy=None,num_iterations=1,landa=1,threshold = 0.0001): 269 | ''' 270 | Second training loop 271 | If you use proposed method in the article you can keep landa at 1 o.w. tune it 272 | ''' 273 | total_loss = [] 274 | 275 | optimizer = torch.optim.LBFGS(self.pinn.parameters(),lr=self.lr, max_iter=num_iterations) 276 | if self.PDE == 'Navier_Stokes': 277 | def closure(): 278 | optimizer.zero_grad() 279 | mse_bc = self.bc_loss_navier(xy_bd,uv_bd) + self.outlet_loss_navier(outlet_xy) 280 | mse_pde = self.pde_loss_navier(xy_col) 281 | loss = mse_pde + landa*mse_bc 282 | loss.backward() 283 | return loss 284 | 285 | for i in tqdm(range(Epochs)): 286 | loss = optimizer.step(closure) 287 | total_loss.append(loss.detach().cpu().item()) 288 | if loss.detach().cpu().item() <= threshold: 289 | return total_loss,self.pinn 290 | if i % 100 == 0: 291 | print('Epoch: {}, Loss: {}'.format(i, loss.item())) 292 | 293 | elif self.PDE == 'Heat_Conduction': 294 | def closure(): 295 | optimizer.zero_grad() 296 | mse_bc = self.bc_loss_prediction(xy_bd,uv_bd) 297 | mse_pde = self.pde_loss_conduction(xy_col) 298 | loss = mse_pde + landa*mse_bc 299 | loss.backward() 300 | return loss 301 | for i in tqdm(range(Epochs)): 302 | loss = optimizer.step(closure) 303 | total_loss.append(loss.detach().cpu().item()) 304 | if loss.detach().cpu().item() <= threshold: 305 | return total_loss,self.pinn 306 | if i % 100 == 0: 307 | print('Epoch: {}, Loss: {}'.format(i, loss.item())) 308 | elif self.PDE == 'Burger': 309 | def closure(): 310 | optimizer.zero_grad() 311 | mse_bc = self.bc_loss_prediction(xy_bd,uv_bd) 312 | mse_pde = self.pde_loss_burger(xy_col) 313 | loss = mse_pde + landa*mse_bc 314 | loss.backward() 315 | return loss 316 | 317 | for i in tqdm(range(Epochs)): 318 | loss = optimizer.step(closure) 319 | total_loss.append(loss.detach().cpu().item()) 320 | if loss.detach().cpu().item() <= threshold: 321 | return total_loss,self.pinn 322 | if i % 100 == 0: 323 | print('Epoch: {}, Loss: {}'.format(i, loss.item())) 324 | else: 325 | raise ValueError("Unsupported PDE type") 326 | 327 | return total_loss,self.pinn 328 | 329 | def lbfgs_optimizer_inverse(self,Epochs,xy_col,xy_inverse,uv_inverse,xy_bd,uv_bd,outlet_xy,num_iterations=1,landa=1): 330 | total_loss = [] 331 | optimizer = torch.optim.LBFGS(self.pinn.parameters(),lr=self.lr, max_iter=num_iterations) 332 | best_loss = 1000000 333 | best_epoch = 0 334 | def closure(): 335 | optimizer.zero_grad() 336 | mse_bc = self.bc_loss_navier(xy_bd,uv_bd) + self.outlet_loss_navier(outlet_xy) 337 | loss_inverse = self.bc_loss_navier(xy_inverse,uv_inverse) 338 | mse_pde = self.pde_loss_navier(xy_col) 339 | loss = + landa*mse_bc + loss_inverse + mse_pde 340 | loss.backward() 341 | return loss 342 | 343 | for i in tqdm(range(Epochs)): 344 | loss = optimizer.step(closure) 345 | total_loss.append(loss.detach().cpu().item()) 346 | if loss.detach().cpu().item() <= 1e-4: 347 | return total_loss,self.pinn 348 | if i % 100 == 0: 349 | print('Epoch: {}, Loss: {}'.format(i, loss.item())) 350 | if loss.item() < best_loss: 351 | best_loss = loss.item() 352 | best_epoch = i 353 | if i - best_epoch > 10000: 354 | print('No improvement in 10000 epochs, stopping') 355 | return total_loss,self.pinn 356 | if loss.detach().cpu().item() >= 1000: 357 | print('Model Diverged') 358 | return total_loss,self.pinn 359 | return total_loss,self.pinn 360 | 361 | def compelete_train_inversed(self,EPOCHS,xy_col, x_inverse,value_inverse, x_bd, value_bnd): 362 | 'first phase' 363 | 364 | train_loss = [] 365 | epochs = EPOCHS 366 | for epoch in tqdm(range(epochs)): 367 | self.optim.zero_grad() 368 | mse_bc = self.bc_loss_navier(x_bd, value_bnd) 369 | mse_pde = self.pde_loss_navier(xy_col) 370 | mse_inverse_ = self.bc_loss_navier(x_inverse,value_inverse) 371 | loss = mse_pde+ mse_bc + mse_inverse_ 372 | loss.backward() 373 | train_loss.append(loss.detach().cpu().item()) 374 | self.optim.step() 375 | if (epoch+1) % 1000 == 0: 376 | print('Epoch: {}, Loss: {}, inverse:{},mse:{}'.format(epoch, loss.item(),mse_inverse_.item(),mse_bc.item())) 377 | 378 | return train_loss,self.pinn -------------------------------------------------------------------------------- /preprocess.py: -------------------------------------------------------------------------------- 1 | from model import * 2 | from training_manager import * 3 | 4 | DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 5 | 6 | class batch_maker: 7 | def __init__(self,cylinder=[0,0,0.1],domain_ub=[1,1],domain_lb=[-1,-1],A=1,Centered=True): 8 | super().__init__() 9 | ''' 10 | A: total amount of domain expansion around the cylinder 11 | A=1 corrsponds to a square that exactly matches the cylinder 12 | ''' 13 | self.centered = Centered 14 | self.x_cyc,self.y_cyc,self.r_cyc= cylinder[0],cylinder[1],cylinder[2] 15 | self.upper_bound_cyl = np.array([self.x_cyc + self.r_cyc, self.y_cyc + self.r_cyc]) 16 | self.lower_bound_cyl = np.array([self.x_cyc - self.r_cyc, self.y_cyc - self.r_cyc]) 17 | self.sampler = qmc.LatinHypercube(d=2, strength=2 ,optimization="random-cd") 18 | self.y_ub,self.y_lb = domain_ub[1],domain_lb[1] 19 | self.x_ub,self.x_lb = domain_ub[0],domain_lb[0] 20 | self.A = A 21 | assert A >= 1 22 | assert self.x_cyc >= 0 23 | assert self.y_cyc >= 0 24 | assert self.r_cyc > 0 25 | assert 2*self.r_cyc < self.y_ub - self.y_lb 26 | #check these methods Sobol', Halton, 27 | def cylinder2d(self,N_cyc): 28 | ''' 29 | Defining a 2D cylinder. 30 | x_c & y_c shows the center of the cylinder 31 | r: repreasents circle radius 32 | N is the total number of boundary points on cylinder 33 | The default value for x_c,y_c,r = [0,0,0.1] 34 | ''' 35 | assert N_cyc >= 1 36 | teta = 2 * np.random.random(size= N_cyc) - 1 37 | #To make sure these values always exist and not repeated 38 | mask = (teta != 0) & (teta != 0.5) & (teta != 1) & (teta != 1.5) 39 | teta = teta[mask] 40 | teta = np.append(teta,[0,0.5,1,1.5]) 41 | obstacle_x = (self.r_cyc * np.cos(teta*np.pi)+ self.x_cyc).reshape(-1, 1) 42 | obstacle_y = (self.r_cyc * np.sin(teta*np.pi) + self.y_cyc).reshape(-1, 1) 43 | obstacle_xy = np.concatenate([obstacle_x, obstacle_y], axis=1) 44 | return obstacle_xy 45 | 46 | def around_obstacle(self,N_a): 47 | ''' 48 | Defines new points around cylinder 49 | N_a: total points around cyrcle 50 | ''' 51 | around_obstacle_data = self.sampler.random(N_a) 52 | around_obstacle_data = qmc.scale(around_obstacle_data, self.A*self.lower_bound_cyl, self.A*self.upper_bound_cyl) 53 | mask_out_cyl = np.sqrt((around_obstacle_data[:, 0] - self.x_cyc) ** 2 + 54 | (around_obstacle_data[:, 1] - self.y_cyc) ** 2) 55 | around_obstacle_data = around_obstacle_data[mask_out_cyl > self.r_cyc].reshape(-1, 2) 56 | 57 | return around_obstacle_data 58 | 59 | def design_batches(self,number_of_batches, N_b,N,shared_points=False,N_shared_b=0,N_shared =0,N_square =0,S=0.99): 60 | 61 | ''' 62 | Splitting dataset into designed batches 63 | number_of_batches refers to total number of bathces 64 | N_b refers to total number of points in each batch 65 | S is a hyperparameter that excludes boundary points 66 | ''' 67 | y_ub_domain = S*self.y_ub - self.A*self.upper_bound_cyl[1] 68 | y_lb_domain = abs(S*self.y_lb - self.A*self.lower_bound_cyl[1]) 69 | x_ub_domain = S*self.x_ub - self.A*self.upper_bound_cyl[0] 70 | x_lb_domain = abs(S*self.x_lb - self.A*self.lower_bound_cyl[0]) 71 | 72 | splitted_y_ub = y_ub_domain/number_of_batches 73 | splitted_y_lb = y_lb_domain/number_of_batches 74 | splitted_x_ub = x_ub_domain/number_of_batches 75 | splitted_x_lb = x_lb_domain/number_of_batches 76 | 77 | dataset = [] 78 | if shared_points: 79 | shared_data_over_all_batches = [] 80 | if self.centered : 81 | #doesnt work if lower domain is positive 82 | right_u_bound = np.array([S*self.x_ub , S*self.y_ub - splitted_y_ub]) 83 | right_l_bound = np.array([S*self.x_ub - splitted_x_ub, S*self.y_lb + splitted_y_lb]) 84 | left_l_bound = np.array([S*self.x_lb , S*self.y_lb + splitted_y_lb]) 85 | left_u_bound = np.array([S*self.x_lb + splitted_x_lb, S*self.y_ub - splitted_y_ub]) 86 | up_u_bound = np.array([S*self.x_ub , S*self.y_ub]) 87 | up_l_bound = np.array([S*self.x_lb , S*self.y_ub - splitted_y_ub]) 88 | down_u_bound = np.array([S*self.x_ub , S*self.y_lb + splitted_y_lb]) 89 | down_l_bound = np.array([S*self.x_lb, S*self.y_lb ]) 90 | 91 | 92 | for i in range(number_of_batches): 93 | data = self.sampler.random(N_b) 94 | data_ = self.sampler.random(N) 95 | data_r = qmc.scale(data_, right_l_bound, right_u_bound) 96 | data_l = qmc.scale(data_, left_l_bound, left_u_bound) 97 | data_u = qmc.scale(data, up_l_bound, up_u_bound) 98 | data_d = qmc.scale(data, down_l_bound, down_u_bound) 99 | new_data = np.concatenate([data_r, data_l,data_u,data_d], axis=0) 100 | dataset.append(new_data) 101 | 102 | if shared_points: 103 | data = self.sampler.random(N_shared_b) 104 | data_ = self.sampler.random(N_shared) 105 | data_r = qmc.scale(data_, right_l_bound, right_u_bound) 106 | data_l = qmc.scale(data_, left_l_bound, left_u_bound) 107 | data_u = qmc.scale(data, up_l_bound, up_u_bound) 108 | data_d = qmc.scale(data, down_l_bound, down_u_bound) 109 | new_data = np.concatenate([data_r, data_l,data_u,data_d], axis=0) 110 | shared_data_over_all_batches.append(new_data) 111 | 112 | right_u_bound[0] -= splitted_x_ub 113 | right_u_bound[1] -= splitted_y_ub 114 | right_l_bound[0] -= splitted_x_ub 115 | right_l_bound[1] += splitted_y_lb 116 | left_u_bound[0] += splitted_x_lb 117 | left_u_bound[1] -= splitted_y_ub 118 | left_l_bound[0] += splitted_x_lb 119 | left_l_bound[1] += splitted_y_lb 120 | up_u_bound[0] -= splitted_x_ub 121 | up_u_bound[1] -= splitted_y_ub 122 | up_l_bound[0] += splitted_x_lb 123 | up_l_bound[1] -= splitted_y_ub 124 | down_u_bound[0] -= splitted_x_ub 125 | down_u_bound[1] += splitted_y_lb 126 | down_l_bound[0] += splitted_x_lb 127 | down_l_bound[1] += splitted_y_lb 128 | else: 129 | right_u_bound = np.array([S*self.x_ub , S*self.y_ub]) 130 | right_l_bound = np.array([S*self.x_ub - splitted_x_ub, S*self.y_lb]) 131 | left_u_bound = np.array([S*self.x_lb + splitted_x_lb, S*self.y_ub]) 132 | left_l_bound = np.array([S*self.x_lb , S*self.y_lb ]) 133 | up_u_bound = np.array([self.A*self.upper_bound_cyl[0], S*self.y_ub]) 134 | up_l_bound = np.array([self.A*self.lower_bound_cyl[0], self.A*self.upper_bound_cyl[1]]) 135 | down_u_bound = np.array([self.A*self.upper_bound_cyl[0], self.A*self.lower_bound_cyl[1]]) 136 | down_l_bound = np.array([self.A*self.lower_bound_cyl[0], S*self.y_lb ]) 137 | data = self.sampler.random(N_b) 138 | data_up = qmc.scale(data, up_l_bound, up_u_bound) 139 | dataset.append(data_up) 140 | data_down = qmc.scale(data, down_l_bound, down_u_bound) 141 | dataset.append(data_down) 142 | for i in range(number_of_batches): 143 | data = self.sampler.random(N_b) 144 | data_right = qmc.scale(data, right_l_bound, right_u_bound) 145 | data_left = qmc.scale(data, left_l_bound, left_u_bound) 146 | dataset.append(data_right) 147 | dataset.append(data_left) 148 | right_l_bound[0] -= splitted_x_ub 149 | right_u_bound[0] -= splitted_x_ub 150 | left_u_bound[0] += splitted_x_lb 151 | left_l_bound[0] += splitted_x_lb 152 | 153 | if shared_points: 154 | square = self.around_obstacle(N_square) 155 | data__ = [] 156 | temp_data_ = np.concatenate(shared_data_over_all_batches[:], axis=0).reshape(-1,2) 157 | for i in range(number_of_batches): 158 | temp_array = np.concatenate([dataset[i], temp_data_,square],axis=0) 159 | temp_array = np.unique(temp_array, axis=0) 160 | data__.append(temp_array) 161 | return data__ 162 | else: 163 | return dataset 164 | 165 | def boundary(self,N_boundary,N_wall,value_u=1,value_v=0,value_outlet=0): 166 | inlet_x = np.zeros((N_boundary, 1)) 167 | inlet_y = np.random.uniform(self.y_lb, self.y_ub, (N_boundary, 1)) 168 | #change ThIS PART LATER 169 | #inlet_u = value_u*np.ones((N_boundary, 1)) 170 | inlet_u = 4 * inlet_y * (0.4 - inlet_y) / (0.4 ** 2) 171 | inlet_v = value_v*np.ones((N_boundary, 1)) 172 | inlet_xy = np.concatenate([inlet_x, inlet_y], axis=1) 173 | inlet_uv = np.concatenate([inlet_u, inlet_v], axis=1) 174 | #prssure=0 in outlet 175 | outlet_xy = np.random.uniform([self.x_ub, self.y_lb], [self.x_ub, self.y_ub], (N_boundary, 2)) 176 | outlet_value = value_outlet*np.ones((N_boundary, 1)) 177 | #walls with no slip conditions 178 | upwall_xy = np.random.uniform([self.x_lb, self.y_ub], [self.x_ub, self.y_ub], (N_wall, 2)) 179 | dnwall_xy = np.random.uniform([self.x_lb, self.y_lb], [self.x_ub, self.y_lb], (N_wall, 2)) 180 | upwall_uv = np.zeros((N_wall, 2)) 181 | dnwall_uv = np.zeros((N_wall, 2)) 182 | wall_xy = np.concatenate([upwall_xy, dnwall_xy], axis=0) 183 | wall_uv = np.concatenate([upwall_uv, dnwall_uv], axis=0) 184 | return wall_xy,wall_uv,outlet_xy,outlet_value,inlet_xy,inlet_uv 185 | def dataset(self,*args): 186 | number_of_batches,N_b,S = args[0],args[1],args[2] 187 | number_of_batches,N_b,S = args[0],args[1],args[2] 188 | data_domain = self.design_batches(number_of_batches, N_b,S) 189 | boundary = [] 190 | '''for i in range number_of_batches: 191 | boundary(self,N_boundary,N_wall,value_u=1,value_v=0,value_outlet=0):''' 192 | def plot(self,data,title='title'): 193 | plt.title(title) 194 | plt.scatter(data[:, 0], data[:, 1], s=.2, marker=".", c="r", label="CP") 195 | plt.show() 196 | 197 | def plot1(self, data, title='Title', xlabel='X-axis label', ylabel='Y-axis label'): 198 | fig, ax = plt.subplots(figsize=(6, 6)) 199 | ax.scatter(data[:, 0], data[:, 1], s=2, marker="o", c="red", label="CP") 200 | ax.set_xlabel(xlabel, fontsize=12) 201 | ax.set_ylabel(ylabel, fontsize=12) 202 | ax.set_title(title, fontsize=14) 203 | ax.tick_params(axis='both', which='major', labelsize=10) 204 | ax.grid(True) 205 | plt.tight_layout() 206 | plt.show() 207 | 208 | def pipe_line(neurons_in_layaers,tensor_total_domain,ub,lb,n=10,PDE='Navier_Stokes'): 209 | ''' 210 | Reducing Variance and Preparing Inputs for Training on the Primary Loss Function using the intial state of different neural networks 211 | n is a variance reduction factor. For instance, n = 10 means the variance will be reduced by the square root of ten. 212 | We reduced the variance by averaging out the initial state distribution of n neural networks. 213 | ''' 214 | state_model = [] 215 | total = [] 216 | new_model = {} 217 | LR=None 218 | for _ in range(n): 219 | chosen_points_ = [] 220 | model = PINN_Net(PDE,neurons_in_layaers,ub,lb,nn.Tanh()).to(DEVICE) 221 | state_model.append(model.state_dict()) 222 | trainer = PINNManager(model,LR,PDE) 223 | for j in range(len(tensor_total_domain)): 224 | if PDE == 'Navier_Stokes': 225 | chosen_points_.append(trainer.point_selection_navier(tensor_total_domain[j].to(DEVICE),percent=0.1)) 226 | elif PDE == 'Heat_Conduction': 227 | chosen_points_.append(trainer.point_selection_conduction(tensor_total_domain[j].to(DEVICE),percent=0.1)) 228 | elif PDE == 'Burger': 229 | chosen_points_.append(trainer.point_selection_burger(tensor_total_domain[j].to(DEVICE),percent=0.1)) 230 | else: 231 | raise ValueError("Unsupported PDE type") 232 | total.append(chosen_points_) 233 | del model 234 | del chosen_points_ 235 | for key in state_model[0].keys(): 236 | new_model[key] = 0 237 | 238 | for i in range(len(state_model)): 239 | for key in state_model[0].keys(): 240 | new_model[key]+= state_model[i][key] 241 | 242 | for key in state_model[0].keys(): 243 | new_model[key] /= len(state_model) 244 | 245 | temp = [torch.from_numpy(np.concatenate([total[i][j].detach().cpu().numpy() for i in range(n)], axis=0)) for j in range(len(tensor_total_domain))] 246 | result = torch.cat(temp, dim=0) 247 | result = torch.unique(result, dim=0).to(DEVICE) 248 | 249 | return new_model,result 250 | 251 | def load_smart_weights(complete_model,smart_weight): 252 | ''' 253 | Please note that `copy.deepcopy` is not used, so the previous state will also be modified after the final training. 254 | 255 | ''' 256 | train_key = list(complete_model.state_dict()) 257 | sub_train_key = list(smart_weight.state_dict()) 258 | state_warmed = [] 259 | state_last = [] 260 | state_warmed.append(smart_weight.state_dict()) 261 | state_last.append(complete_model.state_dict()) 262 | last_state = {} 263 | for key in train_key: 264 | last_state[key] = 0 265 | for key in train_key: 266 | last_state[key] = state_last[0][key] 267 | for key in sub_train_key: 268 | last_state[key] = state_warmed[0][key] 269 | return last_state 270 | 271 | 272 | 273 | def prepare_data(x_min=0.0, x_max=1.0, y_min=0.0, y_max=0.4, r=0.05, xc=0.2, yc=0.2, N_b=200, N_w=400, N_s=200, N_c=40000, N_r=10000): 274 | ub = np.array([x_max, y_max]) 275 | lb = np.array([x_min, y_min]) 276 | 277 | def getData(): 278 | # inlet, v=0 & inlet velocity 279 | inlet_x = np.zeros((N_b, 1)) 280 | inlet_y = np.random.uniform(y_min, y_max, (N_b, 1)) 281 | inlet_u = 4 * inlet_y * (0.4 - inlet_y) / (0.4 ** 2) 282 | inlet_v = np.zeros((N_b, 1)) 283 | inlet_xy = np.concatenate([inlet_x, inlet_y], axis=1) 284 | inlet_uv = np.concatenate([inlet_u, inlet_v], axis=1) 285 | 286 | # outlet, p=0 287 | xy_outlet = np.random.uniform([x_max, y_min], [x_max, y_max], (N_b, 2)) 288 | 289 | # wall, u=v=0 290 | upwall_xy = np.random.uniform([x_min, y_max], [x_max, y_max], (N_w, 2)) 291 | dnwall_xy = np.random.uniform([x_min, y_min], [x_max, y_min], (N_w, 2)) 292 | upwall_uv = np.zeros((N_w, 2)) 293 | dnwall_uv = np.zeros((N_w, 2)) 294 | 295 | # cylinder surface, u=v=0 296 | theta = np.linspace(0.0, 2 * np.pi, N_s) 297 | cyl_x = (r * np.cos(theta) + xc).reshape(-1, 1) 298 | cyl_y = (r * np.sin(theta) + yc).reshape(-1, 1) 299 | cyl_xy = np.concatenate([cyl_x, cyl_y], axis=1) 300 | cyl_uv = np.zeros((N_s, 2)) 301 | 302 | # all boundary except outlet 303 | xy_bnd = np.concatenate([inlet_xy, upwall_xy, dnwall_xy, cyl_xy], axis=0) 304 | uv_bnd = np.concatenate([inlet_uv, upwall_uv, dnwall_uv, cyl_uv], axis=0) 305 | 306 | # Collocation 307 | xy_col = lb + (ub - lb) * lhs(2, N_c) 308 | 309 | # refine points around cylider 310 | refine_ub = np.array([xc + 2 * r, yc + 2 * r]) 311 | refine_lb = np.array([xc - 2 * r, yc - 2 * r]) 312 | 313 | xy_col_refine = refine_lb + (refine_ub - refine_lb) * lhs(2, N_r) 314 | xy_col = np.concatenate([xy_col, xy_col_refine], axis=0) 315 | 316 | # remove collocation points inside the cylinder 317 | dst_from_cyl = np.sqrt((xy_col[:, 0] - xc) ** 2 + (xy_col[:, 1] - yc) ** 2) 318 | xy_col = xy_col[dst_from_cyl > r].reshape(-1, 2) 319 | 320 | # concatenate all xy for collocation 321 | xy_col = np.concatenate((xy_col, xy_bnd, xy_outlet), axis=0) 322 | 323 | # convert to tensor 324 | xy_bnd = torch.tensor(xy_bnd, dtype=torch.float32).to(DEVICE) 325 | uv_bnd = torch.tensor(uv_bnd, dtype=torch.float32).to(DEVICE) 326 | xy_outlet = torch.tensor(xy_outlet, dtype=torch.float32).to(DEVICE) 327 | xy_col = torch.tensor(xy_col, dtype=torch.float32).to(DEVICE) 328 | return xy_col.to(DEVICE), xy_bnd.to(DEVICE), uv_bnd.to(DEVICE), xy_outlet.to(DEVICE) 329 | 330 | return getData() 331 | 332 | 333 | 334 | class InputGenerator_2D_heatconduction: 335 | def __init__(self, num_boundary_conditions=4, num_data_per_condition=100, boundary_values=[265., 300., 400., 273.]): 336 | max_temp = max(boundary_values) 337 | boundary_values = [i/max_temp for i in boundary_values] 338 | self.num_boundary_conditions = num_boundary_conditions 339 | self.num_data_per_condition = num_data_per_condition 340 | self.boundary_values = boundary_values 341 | 342 | def generate_data(self): 343 | engine = qmc.LatinHypercube(d=1) 344 | data = np.zeros([self.num_boundary_conditions, self.num_data_per_condition, 3]) 345 | 346 | for i, j in zip(range(self.num_boundary_conditions), [-1, +1, -1, +1]): 347 | points = (engine.random(n=self.num_data_per_condition)[:, 0] - 0.5) * 2 348 | if i < 2: 349 | data[i, :, 0] = j 350 | data[i, :, 1] = points 351 | else: 352 | data[i, :, 0] = points 353 | data[i, :, 1] = j 354 | 355 | for i in range(self.num_boundary_conditions): 356 | data[i, :, 2] = self.boundary_values[i] 357 | 358 | data = data.reshape(self.num_data_per_condition * self.num_boundary_conditions, 3) 359 | return data 360 | 361 | def generate_collocation_points(self, num_collocation_points=40000): 362 | engine = qmc.LatinHypercube(d=2) 363 | collocation_points = engine.random(n=num_collocation_points) 364 | collocation_points = 1.999 * (collocation_points -0.5) 365 | return collocation_points 366 | 367 | 368 | def get_burger_boundaries(bonds=[0,4,0,5],num_samples = 30,time_grid=0.01,boundary_grid=0.0013334): 369 | x_min = bonds[0] 370 | x_max = bonds[1] 371 | T_min = bonds[2] 372 | T_max = bonds[3] 373 | 374 | engine = qmc.LatinHypercube(d=2) 375 | samples = engine.random(n=num_samples) 376 | x_samples = samples[:, 0] * (x_max - x_min) + x_min # x_min = 0, x_max = 4 377 | 378 | T_samples = np.arange(T_min,T_max, time_grid) 379 | X, T = np.meshgrid(x_samples, T_samples) 380 | x = X.reshape(-1, 1) 381 | t = T.reshape(-1, 1) 382 | xt = np.concatenate([x, t], axis=1) 383 | xt_domain = torch.tensor(xt, dtype=torch.float32).to(DEVICE) 384 | x_ = np.arange(x_min,x_max, boundary_grid) 385 | X_initial, T_initial = np.meshgrid(x_, T_min) 386 | x_initial = X_initial.reshape(-1, 1) 387 | t_initial = T_initial.reshape(-1, 1) 388 | xt_initial = np.concatenate([x_initial, t_initial], axis=1) 389 | xt_intial = torch.tensor(xt_initial, dtype=torch.float32).to(DEVICE) 390 | #boundary for different netwroks 391 | # b.c_left 392 | t_bcl = np.arange(T_min,T_max, time_grid) 393 | X_bcl, T_bcl = np.meshgrid(x_min, t_bcl) 394 | x_bcl = X_bcl.reshape(-1, 1) 395 | t_bcl = T_bcl.reshape(-1, 1) 396 | xt_bcl = np.concatenate([x_bcl, t_bcl], axis=1) 397 | xt_bcl = torch.tensor(xt_bcl, dtype=torch.float32).to(DEVICE) 398 | #bc-right 399 | t_bcr = np.arange(T_min,T_max, time_grid) 400 | X_bcr, T_bcr = np.meshgrid(x_max, t_bcr) 401 | x_bcr = X_bcr.reshape(-1, 1) 402 | t_bcr = T_bcr.reshape(-1, 1) 403 | xt_bcr = np.concatenate([x_bcr, t_bcr], axis=1) 404 | xt_bcr = torch.tensor(xt_bcr, dtype=torch.float32).to(DEVICE) 405 | 406 | return xt_domain,xt_intial,xt_bcr,xt_bcl 407 | 408 | def burger_ans(x,t): 409 | nominator = 2*0.01*torch.pi*torch.sin(torch.pi*x)*torch.exp((-torch.pi**2)*(t-5)*0.01) 410 | denominator = 2 + torch.cos(torch.pi*x)*torch.exp((-torch.pi**2)*(t-5)*0.01) 411 | return nominator/denominator -------------------------------------------------------------------------------- /FE-PINN_Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from model import * \n", 10 | "from preprocess import * \n", 11 | "from training_manager import *" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", 21 | "x_min = 0.0\n", 22 | "x_max = 1.0\n", 23 | "y_min = 0.0\n", 24 | "y_max = 0.4\n", 25 | "ub = np.array([x_max, y_max])\n", 26 | "lb = np.array([x_min, y_min])\n", 27 | "data_loader = batch_maker(cylinder=[0.2,0.2,0.05],domain_ub=[x_max,y_max],domain_lb=[x_min,y_min],A=1,Centered=True)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "#for the first phase\n", 37 | "cyl_xy = data_loader.cylinder2d(300)\n", 38 | "cyl_uv = np.zeros((300+4, 2))\n", 39 | "\n", 40 | "domain_square = data_loader.around_obstacle(1849)\n", 41 | "total_domain = data_loader.design_batches(10,1849,1849)\n", 42 | "total_domain.append(domain_square)\n", 43 | "###############################################################################\n", 44 | "'''\n", 45 | "**Important Note:** The first loss function, referred to as the Primary Loss Function in the article, only includes a subset of boundary conditions. \n", 46 | "Therefore, to construct this loss function, only a subset of boundary conditions should be defined as `xy_bd` and `uv_bd`. \n", 47 | "'''\n", 48 | "###############################################################################\n", 49 | "\n", 50 | "wall_xy,wall_uv,_,_,inlet_xy,inlet_uv = data_loader.boundary(1000, 1000)\n", 51 | "\n", 52 | "# wall_cycl and outlet pressure is exculded from xy_bd\n", 53 | "xy_bd = np.concatenate([wall_xy,inlet_xy], axis=0)\n", 54 | "xy_bd = torch.tensor(xy_bd, dtype=torch.float32).to(DEVICE)\n", 55 | "uv_bd = np.concatenate([wall_uv,inlet_uv], axis=0)\n", 56 | "uv_bd = torch.tensor(uv_bd, dtype=torch.float32).to(DEVICE)\n", 57 | "\n", 58 | "tensor_total_domain = []\n", 59 | "for i in total_domain:\n", 60 | " i = torch.tensor(i, dtype=torch.float32).to(DEVICE)\n", 61 | " tensor_total_domain.append(i)\n" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 4, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "name": "stderr", 71 | "output_type": "stream", 72 | "text": [ 73 | " 10%|███████▋ | 1019/10000 [00:07<01:02, 144.70it/s]" 74 | ] 75 | }, 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "Epoch: 999, Loss: 0.008030088618397713, bc: 0.0031161224469542503, PDE: 0.004913965705782175\n" 81 | ] 82 | }, 83 | { 84 | "name": "stderr", 85 | "output_type": "stream", 86 | "text": [ 87 | " 20%|███████████████▍ | 2027/10000 [00:14<00:56, 141.38it/s]" 88 | ] 89 | }, 90 | { 91 | "name": "stdout", 92 | "output_type": "stream", 93 | "text": [ 94 | "Epoch: 1999, Loss: 0.0005651228711940348, bc: 0.00021668057888746262, PDE: 0.0003484422923065722\n" 95 | ] 96 | }, 97 | { 98 | "name": "stderr", 99 | "output_type": "stream", 100 | "text": [ 101 | " 30%|███████████████████████ | 3027/10000 [00:20<00:45, 153.22it/s]" 102 | ] 103 | }, 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "Epoch: 2999, Loss: 0.00019511919526848942, bc: 5.159979627933353e-05, PDE: 0.0001435193989891559\n" 109 | ] 110 | }, 111 | { 112 | "name": "stderr", 113 | "output_type": "stream", 114 | "text": [ 115 | " 40%|██████████████████████████████▌ | 4019/10000 [00:27<00:39, 151.93it/s]" 116 | ] 117 | }, 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "Epoch: 3999, Loss: 8.887232979759574e-05, bc: 2.082361424982082e-05, PDE: 6.804871372878551e-05\n" 123 | ] 124 | }, 125 | { 126 | "name": "stderr", 127 | "output_type": "stream", 128 | "text": [ 129 | " 50%|██████████████████████████████████████▏ | 5028/10000 [00:33<00:33, 149.12it/s]" 130 | ] 131 | }, 132 | { 133 | "name": "stdout", 134 | "output_type": "stream", 135 | "text": [ 136 | "Epoch: 4999, Loss: 6.489328370662406e-05, bc: 1.5011287359811831e-05, PDE: 4.9881993618328124e-05\n" 137 | ] 138 | }, 139 | { 140 | "name": "stderr", 141 | "output_type": "stream", 142 | "text": [ 143 | " 60%|█████████████████████████████████████████████▊ | 6028/10000 [00:40<00:26, 149.59it/s]" 144 | ] 145 | }, 146 | { 147 | "name": "stdout", 148 | "output_type": "stream", 149 | "text": [ 150 | "Epoch: 5999, Loss: 3.8179590774234384e-05, bc: 1.1427870049374178e-05, PDE: 2.6751720724860206e-05\n" 151 | ] 152 | }, 153 | { 154 | "name": "stderr", 155 | "output_type": "stream", 156 | "text": [ 157 | " 70%|█████████████████████████████████████████████████████▎ | 7023/10000 [00:47<00:19, 152.65it/s]" 158 | ] 159 | }, 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "Epoch: 6999, Loss: 4.317411730880849e-05, bc: 1.392912872688612e-05, PDE: 2.924498767242767e-05\n" 165 | ] 166 | }, 167 | { 168 | "name": "stderr", 169 | "output_type": "stream", 170 | "text": [ 171 | " 80%|████████████████████████████████████████████████████████████▉ | 8015/10000 [00:53<00:12, 155.57it/s]" 172 | ] 173 | }, 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "Epoch: 7999, Loss: 2.6806188543559983e-05, bc: 8.74093166203238e-06, PDE: 1.8065256881527603e-05\n" 179 | ] 180 | }, 181 | { 182 | "name": "stderr", 183 | "output_type": "stream", 184 | "text": [ 185 | " 90%|████████████████████████████████████████████████████████████████████▌ | 9027/10000 [01:00<00:06, 151.41it/s]" 186 | ] 187 | }, 188 | { 189 | "name": "stdout", 190 | "output_type": "stream", 191 | "text": [ 192 | "Epoch: 8999, Loss: 2.070401023956947e-05, bc: 7.424153409374412e-06, PDE: 1.327985773968976e-05\n" 193 | ] 194 | }, 195 | { 196 | "name": "stderr", 197 | "output_type": "stream", 198 | "text": [ 199 | "100%|███████████████████████████████████████████████████████████████████████████| 10000/10000 [01:06<00:00, 150.24it/s]" 200 | ] 201 | }, 202 | { 203 | "name": "stdout", 204 | "output_type": "stream", 205 | "text": [ 206 | "Epoch: 9999, Loss: 1.800814425223507e-05, bc: 6.653247510257643e-06, PDE: 1.1354895832482725e-05\n" 207 | ] 208 | }, 209 | { 210 | "name": "stderr", 211 | "output_type": "stream", 212 | "text": [ 213 | "\n" 214 | ] 215 | } 216 | ], 217 | "source": [ 218 | "EPOCHS = 10000\n", 219 | "#variance reduction and preparing domain points using the inital weight state of different neural networks\n", 220 | "new_model,result = pipe_line([40,40,40,40],tensor_total_domain,ub,lb,n=10,PDE='Navier_Stokes')\n", 221 | "#create the model\n", 222 | "model_pipe = PINN_Net('Navier_Stokes', [40,40,40,40],ub,lb,nn.Tanh()).to(DEVICE)\n", 223 | "# load the initial state with reduced variance\n", 224 | "model_pipe.load_state_dict(new_model)\n", 225 | "# First phase of train\n", 226 | "trainer_pipe = PINNManager(model_pipe.to(DEVICE),3e-4,'Navier_Stokes')\n", 227 | "losse_bc,losses_pde,model_pipe_trained = trainer_pipe.adam_optimizer(EPOCHS,result,xy_bd,uv_bd)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "- Replacing a proportion of random weights with `smart weights` (the weights created from the previous phase of training) \n", 235 | "in the output layer and the first few layers." 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 5, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "text/plain": [ 246 | "" 247 | ] 248 | }, 249 | "execution_count": 5, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "#Increasing complexity by adding random weights to smart weights\n", 256 | "last_model = PINN_Net('Navier_Stokes', [40,40,40,40,40,40,40,40,40,40],ub,lb,nn.Tanh()).to(DEVICE)\n", 257 | "last_model_updated_state = load_smart_weights(last_model,model_pipe_trained) \n", 258 | "last_model.load_state_dict(last_model_updated_state)\n" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 6, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "name": "stderr", 268 | "output_type": "stream", 269 | "text": [ 270 | " 0%| | 6/400000 [00:00<2:13:57, 49.77it/s]" 271 | ] 272 | }, 273 | { 274 | "name": "stdout", 275 | "output_type": "stream", 276 | "text": [ 277 | "Epoch: 0, Loss: 2.1202948093414307\n" 278 | ] 279 | }, 280 | { 281 | "name": "stderr", 282 | "output_type": "stream", 283 | "text": [ 284 | " 0%| | 107/400000 [00:02<3:18:38, 33.55it/s]" 285 | ] 286 | }, 287 | { 288 | "name": "stdout", 289 | "output_type": "stream", 290 | "text": [ 291 | "Epoch: 100, Loss: 0.04013194143772125\n" 292 | ] 293 | }, 294 | { 295 | "name": "stderr", 296 | "output_type": "stream", 297 | "text": [ 298 | " 0%| | 207/400000 [00:05<3:21:34, 33.06it/s]" 299 | ] 300 | }, 301 | { 302 | "name": "stdout", 303 | "output_type": "stream", 304 | "text": [ 305 | "Epoch: 200, Loss: 0.03408331423997879\n" 306 | ] 307 | }, 308 | { 309 | "name": "stderr", 310 | "output_type": "stream", 311 | "text": [ 312 | " 0%| | 307/400000 [00:08<3:20:48, 33.17it/s]" 313 | ] 314 | }, 315 | { 316 | "name": "stdout", 317 | "output_type": "stream", 318 | "text": [ 319 | "Epoch: 300, Loss: 0.03214644268155098\n" 320 | ] 321 | }, 322 | { 323 | "name": "stderr", 324 | "output_type": "stream", 325 | "text": [ 326 | " 0%| | 407/400000 [00:11<3:25:31, 32.41it/s]" 327 | ] 328 | }, 329 | { 330 | "name": "stdout", 331 | "output_type": "stream", 332 | "text": [ 333 | "Epoch: 400, Loss: 0.030459372326731682\n" 334 | ] 335 | }, 336 | { 337 | "name": "stderr", 338 | "output_type": "stream", 339 | "text": [ 340 | " 0%| | 507/400000 [00:14<3:20:43, 33.17it/s]" 341 | ] 342 | }, 343 | { 344 | "name": "stdout", 345 | "output_type": "stream", 346 | "text": [ 347 | "Epoch: 500, Loss: 0.02844630926847458\n" 348 | ] 349 | }, 350 | { 351 | "name": "stderr", 352 | "output_type": "stream", 353 | "text": [ 354 | " 0%| | 607/400000 [00:17<3:20:17, 33.24it/s]" 355 | ] 356 | }, 357 | { 358 | "name": "stdout", 359 | "output_type": "stream", 360 | "text": [ 361 | "Epoch: 600, Loss: 0.02671224996447563\n" 362 | ] 363 | }, 364 | { 365 | "name": "stderr", 366 | "output_type": "stream", 367 | "text": [ 368 | " 0%|▏ | 707/400000 [00:20<3:19:29, 33.36it/s]" 369 | ] 370 | }, 371 | { 372 | "name": "stdout", 373 | "output_type": "stream", 374 | "text": [ 375 | "Epoch: 700, Loss: 0.02534942515194416\n" 376 | ] 377 | }, 378 | { 379 | "name": "stderr", 380 | "output_type": "stream", 381 | "text": [ 382 | " 0%|▏ | 807/400000 [00:23<3:17:06, 33.75it/s]" 383 | ] 384 | }, 385 | { 386 | "name": "stdout", 387 | "output_type": "stream", 388 | "text": [ 389 | "Epoch: 800, Loss: 0.023899316787719727\n" 390 | ] 391 | }, 392 | { 393 | "name": "stderr", 394 | "output_type": "stream", 395 | "text": [ 396 | " 0%|▏ | 907/400000 [00:26<3:19:34, 33.33it/s]" 397 | ] 398 | }, 399 | { 400 | "name": "stdout", 401 | "output_type": "stream", 402 | "text": [ 403 | "Epoch: 900, Loss: 0.023100566118955612\n" 404 | ] 405 | }, 406 | { 407 | "name": "stderr", 408 | "output_type": "stream", 409 | "text": [ 410 | " 0%|▏ | 1007/400000 [00:29<3:28:40, 31.87it/s]" 411 | ] 412 | }, 413 | { 414 | "name": "stdout", 415 | "output_type": "stream", 416 | "text": [ 417 | "Epoch: 1000, Loss: 0.02256050519645214\n" 418 | ] 419 | }, 420 | { 421 | "name": "stderr", 422 | "output_type": "stream", 423 | "text": [ 424 | " 0%|▏ | 1107/400000 [00:32<3:15:27, 34.01it/s]" 425 | ] 426 | }, 427 | { 428 | "name": "stdout", 429 | "output_type": "stream", 430 | "text": [ 431 | "Epoch: 1100, Loss: 0.022040680050849915\n" 432 | ] 433 | }, 434 | { 435 | "name": "stderr", 436 | "output_type": "stream", 437 | "text": [ 438 | " 0%|▏ | 1207/400000 [00:35<3:18:38, 33.46it/s]" 439 | ] 440 | }, 441 | { 442 | "name": "stdout", 443 | "output_type": "stream", 444 | "text": [ 445 | "Epoch: 1200, Loss: 0.021679554134607315\n" 446 | ] 447 | }, 448 | { 449 | "name": "stderr", 450 | "output_type": "stream", 451 | "text": [ 452 | " 0%|▏ | 1307/400000 [00:38<3:16:36, 33.80it/s]" 453 | ] 454 | }, 455 | { 456 | "name": "stdout", 457 | "output_type": "stream", 458 | "text": [ 459 | "Epoch: 1300, Loss: 0.021365541964769363\n" 460 | ] 461 | }, 462 | { 463 | "name": "stderr", 464 | "output_type": "stream", 465 | "text": [ 466 | " 0%|▎ | 1403/400000 [00:41<3:38:57, 30.34it/s]" 467 | ] 468 | }, 469 | { 470 | "name": "stdout", 471 | "output_type": "stream", 472 | "text": [ 473 | "Epoch: 1400, Loss: 0.021050482988357544\n" 474 | ] 475 | }, 476 | { 477 | "name": "stderr", 478 | "output_type": "stream", 479 | "text": [ 480 | " 0%|▎ | 1505/400000 [00:45<3:44:42, 29.56it/s]" 481 | ] 482 | }, 483 | { 484 | "name": "stdout", 485 | "output_type": "stream", 486 | "text": [ 487 | "Epoch: 1500, Loss: 0.02069418877363205\n" 488 | ] 489 | }, 490 | { 491 | "name": "stderr", 492 | "output_type": "stream", 493 | "text": [ 494 | " 0%|▎ | 1606/400000 [00:48<3:52:17, 28.58it/s]" 495 | ] 496 | }, 497 | { 498 | "name": "stdout", 499 | "output_type": "stream", 500 | "text": [ 501 | "Epoch: 1600, Loss: 0.02043948695063591\n" 502 | ] 503 | }, 504 | { 505 | "name": "stderr", 506 | "output_type": "stream", 507 | "text": [ 508 | " 0%|▎ | 1706/400000 [00:52<4:04:24, 27.16it/s]" 509 | ] 510 | }, 511 | { 512 | "name": "stdout", 513 | "output_type": "stream", 514 | "text": [ 515 | "Epoch: 1700, Loss: 0.02019607275724411\n" 516 | ] 517 | }, 518 | { 519 | "name": "stderr", 520 | "output_type": "stream", 521 | "text": [ 522 | " 0%|▎ | 1804/400000 [00:55<4:08:58, 26.66it/s]" 523 | ] 524 | }, 525 | { 526 | "name": "stdout", 527 | "output_type": "stream", 528 | "text": [ 529 | "Epoch: 1800, Loss: 0.019980205222964287\n" 530 | ] 531 | }, 532 | { 533 | "name": "stderr", 534 | "output_type": "stream", 535 | "text": [ 536 | " 0%|▎ | 1905/400000 [00:59<4:05:38, 27.01it/s]" 537 | ] 538 | }, 539 | { 540 | "name": "stdout", 541 | "output_type": "stream", 542 | "text": [ 543 | "Epoch: 1900, Loss: 0.019831951707601547\n" 544 | ] 545 | }, 546 | { 547 | "name": "stderr", 548 | "output_type": "stream", 549 | "text": [ 550 | " 1%|▎ | 2004/400000 [01:03<3:58:42, 27.79it/s]" 551 | ] 552 | }, 553 | { 554 | "name": "stdout", 555 | "output_type": "stream", 556 | "text": [ 557 | "Epoch: 2000, Loss: 0.01972629874944687\n" 558 | ] 559 | }, 560 | { 561 | "name": "stderr", 562 | "output_type": "stream", 563 | "text": [ 564 | " 1%|▍ | 2105/400000 [01:06<3:41:45, 29.90it/s]" 565 | ] 566 | }, 567 | { 568 | "name": "stdout", 569 | "output_type": "stream", 570 | "text": [ 571 | "Epoch: 2100, Loss: 0.019632907584309578\n" 572 | ] 573 | }, 574 | { 575 | "name": "stderr", 576 | "output_type": "stream", 577 | "text": [ 578 | " 1%|▍ | 2206/400000 [01:10<3:55:09, 28.19it/s]" 579 | ] 580 | }, 581 | { 582 | "name": "stdout", 583 | "output_type": "stream", 584 | "text": [ 585 | "Epoch: 2200, Loss: 0.019560664892196655\n" 586 | ] 587 | }, 588 | { 589 | "name": "stderr", 590 | "output_type": "stream", 591 | "text": [ 592 | " 1%|▍ | 2304/400000 [01:13<3:39:46, 30.16it/s]" 593 | ] 594 | }, 595 | { 596 | "name": "stdout", 597 | "output_type": "stream", 598 | "text": [ 599 | "Epoch: 2300, Loss: 0.019497791305184364\n" 600 | ] 601 | }, 602 | { 603 | "name": "stderr", 604 | "output_type": "stream", 605 | "text": [ 606 | " 1%|▍ | 2406/400000 [01:16<3:39:22, 30.21it/s]" 607 | ] 608 | }, 609 | { 610 | "name": "stdout", 611 | "output_type": "stream", 612 | "text": [ 613 | "Epoch: 2400, Loss: 0.01944631338119507\n" 614 | ] 615 | }, 616 | { 617 | "name": "stderr", 618 | "output_type": "stream", 619 | "text": [ 620 | " 1%|▍ | 2504/400000 [01:20<3:35:33, 30.73it/s]" 621 | ] 622 | }, 623 | { 624 | "name": "stdout", 625 | "output_type": "stream", 626 | "text": [ 627 | "Epoch: 2500, Loss: 0.019399825483560562\n" 628 | ] 629 | }, 630 | { 631 | "name": "stderr", 632 | "output_type": "stream", 633 | "text": [ 634 | " 1%|▍ | 2606/400000 [01:23<3:37:37, 30.43it/s]" 635 | ] 636 | }, 637 | { 638 | "name": "stdout", 639 | "output_type": "stream", 640 | "text": [ 641 | "Epoch: 2600, Loss: 0.01935957372188568\n" 642 | ] 643 | }, 644 | { 645 | "name": "stderr", 646 | "output_type": "stream", 647 | "text": [ 648 | " 1%|▌ | 2704/400000 [01:27<3:54:49, 28.20it/s]" 649 | ] 650 | }, 651 | { 652 | "name": "stdout", 653 | "output_type": "stream", 654 | "text": [ 655 | "Epoch: 2700, Loss: 0.019325099885463715\n" 656 | ] 657 | }, 658 | { 659 | "name": "stderr", 660 | "output_type": "stream", 661 | "text": [ 662 | " 1%|▌ | 2804/400000 [01:30<3:42:05, 29.81it/s]" 663 | ] 664 | }, 665 | { 666 | "name": "stdout", 667 | "output_type": "stream", 668 | "text": [ 669 | "Epoch: 2800, Loss: 0.019291864708065987\n" 670 | ] 671 | }, 672 | { 673 | "name": "stderr", 674 | "output_type": "stream", 675 | "text": [ 676 | " 1%|▌ | 2904/400000 [01:34<3:38:22, 30.31it/s]" 677 | ] 678 | }, 679 | { 680 | "name": "stdout", 681 | "output_type": "stream", 682 | "text": [ 683 | "Epoch: 2900, Loss: 0.01926732435822487\n" 684 | ] 685 | }, 686 | { 687 | "name": "stderr", 688 | "output_type": "stream", 689 | "text": [ 690 | " 1%|▌ | 3005/400000 [01:37<3:46:53, 29.16it/s]" 691 | ] 692 | }, 693 | { 694 | "name": "stdout", 695 | "output_type": "stream", 696 | "text": [ 697 | "Epoch: 3000, Loss: 0.01924004778265953\n" 698 | ] 699 | }, 700 | { 701 | "name": "stderr", 702 | "output_type": "stream", 703 | "text": [ 704 | " 1%|▌ | 3103/400000 [01:40<3:44:56, 29.41it/s]" 705 | ] 706 | }, 707 | { 708 | "name": "stdout", 709 | "output_type": "stream", 710 | "text": [ 711 | "Epoch: 3100, Loss: 0.019216645509004593\n" 712 | ] 713 | }, 714 | { 715 | "name": "stderr", 716 | "output_type": "stream", 717 | "text": [ 718 | " 1%|▌ | 3206/400000 [01:44<3:51:20, 28.59it/s]" 719 | ] 720 | }, 721 | { 722 | "name": "stdout", 723 | "output_type": "stream", 724 | "text": [ 725 | "Epoch: 3200, Loss: 0.019190818071365356\n" 726 | ] 727 | }, 728 | { 729 | "name": "stderr", 730 | "output_type": "stream", 731 | "text": [ 732 | " 1%|▌ | 3304/400000 [01:47<4:02:51, 27.22it/s]" 733 | ] 734 | }, 735 | { 736 | "name": "stdout", 737 | "output_type": "stream", 738 | "text": [ 739 | "Epoch: 3300, Loss: 0.019161392003297806\n" 740 | ] 741 | }, 742 | { 743 | "name": "stderr", 744 | "output_type": "stream", 745 | "text": [ 746 | " 1%|▋ | 3407/400000 [01:50<3:13:36, 34.14it/s]" 747 | ] 748 | }, 749 | { 750 | "name": "stdout", 751 | "output_type": "stream", 752 | "text": [ 753 | "Epoch: 3400, Loss: 0.01913541555404663\n" 754 | ] 755 | }, 756 | { 757 | "name": "stderr", 758 | "output_type": "stream", 759 | "text": [ 760 | " 1%|▋ | 3507/400000 [01:53<3:15:50, 33.74it/s]" 761 | ] 762 | }, 763 | { 764 | "name": "stdout", 765 | "output_type": "stream", 766 | "text": [ 767 | "Epoch: 3500, Loss: 0.01910945400595665\n" 768 | ] 769 | }, 770 | { 771 | "name": "stderr", 772 | "output_type": "stream", 773 | "text": [ 774 | " 1%|▋ | 3607/400000 [01:56<3:13:24, 34.16it/s]" 775 | ] 776 | }, 777 | { 778 | "name": "stdout", 779 | "output_type": "stream", 780 | "text": [ 781 | "Epoch: 3600, Loss: 0.019085891544818878\n" 782 | ] 783 | }, 784 | { 785 | "name": "stderr", 786 | "output_type": "stream", 787 | "text": [ 788 | " 1%|▋ | 3707/400000 [01:59<3:12:47, 34.26it/s]" 789 | ] 790 | }, 791 | { 792 | "name": "stdout", 793 | "output_type": "stream", 794 | "text": [ 795 | "Epoch: 3700, Loss: 0.01906565949320793\n" 796 | ] 797 | }, 798 | { 799 | "name": "stderr", 800 | "output_type": "stream", 801 | "text": [ 802 | " 1%|▋ | 3807/400000 [02:02<3:13:49, 34.07it/s]" 803 | ] 804 | }, 805 | { 806 | "name": "stdout", 807 | "output_type": "stream", 808 | "text": [ 809 | "Epoch: 3800, Loss: 0.019042618572711945\n" 810 | ] 811 | }, 812 | { 813 | "name": "stderr", 814 | "output_type": "stream", 815 | "text": [ 816 | " 1%|▋ | 3904/400000 [02:05<3:47:23, 29.03it/s]" 817 | ] 818 | }, 819 | { 820 | "name": "stdout", 821 | "output_type": "stream", 822 | "text": [ 823 | "Epoch: 3900, Loss: 0.019025230780243874\n" 824 | ] 825 | }, 826 | { 827 | "name": "stderr", 828 | "output_type": "stream", 829 | "text": [ 830 | " 1%|▋ | 4007/400000 [02:09<3:34:14, 30.80it/s]" 831 | ] 832 | }, 833 | { 834 | "name": "stdout", 835 | "output_type": "stream", 836 | "text": [ 837 | "Epoch: 4000, Loss: 0.01900641620159149\n" 838 | ] 839 | }, 840 | { 841 | "name": "stderr", 842 | "output_type": "stream", 843 | "text": [ 844 | " 1%|▊ | 4106/400000 [02:12<3:11:46, 34.41it/s]" 845 | ] 846 | }, 847 | { 848 | "name": "stdout", 849 | "output_type": "stream", 850 | "text": [ 851 | "Epoch: 4100, Loss: 0.0189853236079216\n" 852 | ] 853 | }, 854 | { 855 | "name": "stderr", 856 | "output_type": "stream", 857 | "text": [ 858 | " 1%|▊ | 4206/400000 [02:15<3:13:03, 34.17it/s]" 859 | ] 860 | }, 861 | { 862 | "name": "stdout", 863 | "output_type": "stream", 864 | "text": [ 865 | "Epoch: 4200, Loss: 0.018967071548104286\n" 866 | ] 867 | }, 868 | { 869 | "name": "stderr", 870 | "output_type": "stream", 871 | "text": [ 872 | " 1%|▊ | 4306/400000 [02:18<3:11:36, 34.42it/s]" 873 | ] 874 | }, 875 | { 876 | "name": "stdout", 877 | "output_type": "stream", 878 | "text": [ 879 | "Epoch: 4300, Loss: 0.018950819969177246\n" 880 | ] 881 | }, 882 | { 883 | "name": "stderr", 884 | "output_type": "stream", 885 | "text": [ 886 | " 1%|▊ | 4406/400000 [02:21<3:12:02, 34.33it/s]" 887 | ] 888 | }, 889 | { 890 | "name": "stdout", 891 | "output_type": "stream", 892 | "text": [ 893 | "Epoch: 4400, Loss: 0.018930964171886444\n" 894 | ] 895 | }, 896 | { 897 | "name": "stderr", 898 | "output_type": "stream", 899 | "text": [ 900 | " 1%|▊ | 4506/400000 [02:24<3:12:25, 34.25it/s]" 901 | ] 902 | }, 903 | { 904 | "name": "stdout", 905 | "output_type": "stream", 906 | "text": [ 907 | "Epoch: 4500, Loss: 0.01891474984586239\n" 908 | ] 909 | }, 910 | { 911 | "name": "stderr", 912 | "output_type": "stream", 913 | "text": [ 914 | " 1%|▊ | 4606/400000 [02:27<3:13:50, 34.00it/s]" 915 | ] 916 | }, 917 | { 918 | "name": "stdout", 919 | "output_type": "stream", 920 | "text": [ 921 | "Epoch: 4600, Loss: 0.018900856375694275\n" 922 | ] 923 | }, 924 | { 925 | "name": "stderr", 926 | "output_type": "stream", 927 | "text": [ 928 | " 1%|▊ | 4706/400000 [02:30<3:16:43, 33.49it/s]" 929 | ] 930 | }, 931 | { 932 | "name": "stdout", 933 | "output_type": "stream", 934 | "text": [ 935 | "Epoch: 4700, Loss: 0.01888556033372879\n" 936 | ] 937 | }, 938 | { 939 | "name": "stderr", 940 | "output_type": "stream", 941 | "text": [ 942 | " 1%|▉ | 4806/400000 [02:33<3:18:01, 33.26it/s]" 943 | ] 944 | }, 945 | { 946 | "name": "stdout", 947 | "output_type": "stream", 948 | "text": [ 949 | "Epoch: 4800, Loss: 0.01887241005897522\n" 950 | ] 951 | }, 952 | { 953 | "name": "stderr", 954 | "output_type": "stream", 955 | "text": [ 956 | " 1%|▉ | 4906/400000 [02:36<3:13:46, 33.98it/s]" 957 | ] 958 | }, 959 | { 960 | "name": "stdout", 961 | "output_type": "stream", 962 | "text": [ 963 | "Epoch: 4900, Loss: 0.01885833963751793\n" 964 | ] 965 | }, 966 | { 967 | "name": "stderr", 968 | "output_type": "stream", 969 | "text": [ 970 | " 1%|▉ | 5006/400000 [02:39<3:13:13, 34.07it/s]" 971 | ] 972 | }, 973 | { 974 | "name": "stdout", 975 | "output_type": "stream", 976 | "text": [ 977 | "Epoch: 5000, Loss: 0.0188458152115345\n" 978 | ] 979 | }, 980 | { 981 | "name": "stderr", 982 | "output_type": "stream", 983 | "text": [ 984 | " 1%|▉ | 5106/400000 [02:41<3:13:01, 34.10it/s]" 985 | ] 986 | }, 987 | { 988 | "name": "stdout", 989 | "output_type": "stream", 990 | "text": [ 991 | "Epoch: 5100, Loss: 0.018832923844456673\n" 992 | ] 993 | }, 994 | { 995 | "name": "stderr", 996 | "output_type": "stream", 997 | "text": [ 998 | " 1%|▉ | 5206/400000 [02:44<3:14:35, 33.81it/s]" 999 | ] 1000 | }, 1001 | { 1002 | "name": "stdout", 1003 | "output_type": "stream", 1004 | "text": [ 1005 | "Epoch: 5200, Loss: 0.018819250166416168\n" 1006 | ] 1007 | }, 1008 | { 1009 | "name": "stderr", 1010 | "output_type": "stream", 1011 | "text": [ 1012 | " 1%|▉ | 5306/400000 [02:47<3:12:31, 34.17it/s]" 1013 | ] 1014 | }, 1015 | { 1016 | "name": "stdout", 1017 | "output_type": "stream", 1018 | "text": [ 1019 | "Epoch: 5300, Loss: 0.018807876855134964\n" 1020 | ] 1021 | }, 1022 | { 1023 | "name": "stderr", 1024 | "output_type": "stream", 1025 | "text": [ 1026 | " 1%|█ | 5406/400000 [02:50<3:06:30, 35.26it/s]" 1027 | ] 1028 | }, 1029 | { 1030 | "name": "stdout", 1031 | "output_type": "stream", 1032 | "text": [ 1033 | "Epoch: 5400, Loss: 0.018798494711518288\n" 1034 | ] 1035 | }, 1036 | { 1037 | "name": "stderr", 1038 | "output_type": "stream", 1039 | "text": [ 1040 | " 1%|█ | 5506/400000 [02:53<3:15:01, 33.71it/s]" 1041 | ] 1042 | }, 1043 | { 1044 | "name": "stdout", 1045 | "output_type": "stream", 1046 | "text": [ 1047 | "Epoch: 5500, Loss: 0.018785636872053146\n" 1048 | ] 1049 | }, 1050 | { 1051 | "name": "stderr", 1052 | "output_type": "stream", 1053 | "text": [ 1054 | " 1%|█ | 5606/400000 [02:56<3:12:58, 34.06it/s]" 1055 | ] 1056 | }, 1057 | { 1058 | "name": "stdout", 1059 | "output_type": "stream", 1060 | "text": [ 1061 | "Epoch: 5600, Loss: 0.018772486597299576\n" 1062 | ] 1063 | }, 1064 | { 1065 | "name": "stderr", 1066 | "output_type": "stream", 1067 | "text": [ 1068 | " 1%|█ | 5706/400000 [02:59<3:12:42, 34.10it/s]" 1069 | ] 1070 | }, 1071 | { 1072 | "name": "stdout", 1073 | "output_type": "stream", 1074 | "text": [ 1075 | "Epoch: 5700, Loss: 0.018759164959192276\n" 1076 | ] 1077 | }, 1078 | { 1079 | "name": "stderr", 1080 | "output_type": "stream", 1081 | "text": [ 1082 | " 1%|█ | 5806/400000 [03:02<3:15:11, 33.66it/s]" 1083 | ] 1084 | }, 1085 | { 1086 | "name": "stdout", 1087 | "output_type": "stream", 1088 | "text": [ 1089 | "Epoch: 5800, Loss: 0.01874559372663498\n" 1090 | ] 1091 | }, 1092 | { 1093 | "name": "stderr", 1094 | "output_type": "stream", 1095 | "text": [ 1096 | " 1%|█ | 5906/400000 [03:05<3:07:00, 35.12it/s]" 1097 | ] 1098 | }, 1099 | { 1100 | "name": "stdout", 1101 | "output_type": "stream", 1102 | "text": [ 1103 | "Epoch: 5900, Loss: 0.01873157173395157\n" 1104 | ] 1105 | }, 1106 | { 1107 | "name": "stderr", 1108 | "output_type": "stream", 1109 | "text": [ 1110 | " 2%|█ | 6006/400000 [03:08<3:13:08, 34.00it/s]" 1111 | ] 1112 | }, 1113 | { 1114 | "name": "stdout", 1115 | "output_type": "stream", 1116 | "text": [ 1117 | "Epoch: 6000, Loss: 0.018718846142292023\n" 1118 | ] 1119 | }, 1120 | { 1121 | "name": "stderr", 1122 | "output_type": "stream", 1123 | "text": [ 1124 | " 2%|█▏ | 6106/400000 [03:11<3:12:05, 34.18it/s]" 1125 | ] 1126 | }, 1127 | { 1128 | "name": "stdout", 1129 | "output_type": "stream", 1130 | "text": [ 1131 | "Epoch: 6100, Loss: 0.01870642974972725\n" 1132 | ] 1133 | }, 1134 | { 1135 | "name": "stderr", 1136 | "output_type": "stream", 1137 | "text": [ 1138 | " 2%|█▏ | 6206/400000 [03:14<3:20:49, 32.68it/s]" 1139 | ] 1140 | }, 1141 | { 1142 | "name": "stdout", 1143 | "output_type": "stream", 1144 | "text": [ 1145 | "Epoch: 6200, Loss: 0.01869281381368637\n" 1146 | ] 1147 | }, 1148 | { 1149 | "name": "stderr", 1150 | "output_type": "stream", 1151 | "text": [ 1152 | " 2%|█▏ | 6306/400000 [03:17<3:16:55, 33.32it/s]" 1153 | ] 1154 | }, 1155 | { 1156 | "name": "stdout", 1157 | "output_type": "stream", 1158 | "text": [ 1159 | "Epoch: 6300, Loss: 0.018679393455386162\n" 1160 | ] 1161 | }, 1162 | { 1163 | "name": "stderr", 1164 | "output_type": "stream", 1165 | "text": [ 1166 | " 2%|█▏ | 6406/400000 [03:20<3:18:37, 33.03it/s]" 1167 | ] 1168 | }, 1169 | { 1170 | "name": "stdout", 1171 | "output_type": "stream", 1172 | "text": [ 1173 | "Epoch: 6400, Loss: 0.018664918839931488\n" 1174 | ] 1175 | }, 1176 | { 1177 | "name": "stderr", 1178 | "output_type": "stream", 1179 | "text": [ 1180 | " 2%|█▏ | 6506/400000 [03:23<3:19:20, 32.90it/s]" 1181 | ] 1182 | }, 1183 | { 1184 | "name": "stdout", 1185 | "output_type": "stream", 1186 | "text": [ 1187 | "Epoch: 6500, Loss: 0.018648985773324966\n" 1188 | ] 1189 | }, 1190 | { 1191 | "name": "stderr", 1192 | "output_type": "stream", 1193 | "text": [ 1194 | " 2%|█▏ | 6606/400000 [03:26<3:17:45, 33.15it/s]" 1195 | ] 1196 | }, 1197 | { 1198 | "name": "stdout", 1199 | "output_type": "stream", 1200 | "text": [ 1201 | "Epoch: 6600, Loss: 0.018633272498846054\n" 1202 | ] 1203 | }, 1204 | { 1205 | "name": "stderr", 1206 | "output_type": "stream", 1207 | "text": [ 1208 | " 2%|█▏ | 6706/400000 [03:29<3:15:00, 33.61it/s]" 1209 | ] 1210 | }, 1211 | { 1212 | "name": "stdout", 1213 | "output_type": "stream", 1214 | "text": [ 1215 | "Epoch: 6700, Loss: 0.018618028610944748\n" 1216 | ] 1217 | }, 1218 | { 1219 | "name": "stderr", 1220 | "output_type": "stream", 1221 | "text": [ 1222 | " 2%|█▎ | 6806/400000 [03:32<3:12:34, 34.03it/s]" 1223 | ] 1224 | }, 1225 | { 1226 | "name": "stdout", 1227 | "output_type": "stream", 1228 | "text": [ 1229 | "Epoch: 6800, Loss: 0.018602818250656128\n" 1230 | ] 1231 | }, 1232 | { 1233 | "name": "stderr", 1234 | "output_type": "stream", 1235 | "text": [ 1236 | " 2%|█▎ | 6906/400000 [03:35<3:11:21, 34.24it/s]" 1237 | ] 1238 | }, 1239 | { 1240 | "name": "stdout", 1241 | "output_type": "stream", 1242 | "text": [ 1243 | "Epoch: 6900, Loss: 0.018586501479148865\n" 1244 | ] 1245 | }, 1246 | { 1247 | "name": "stderr", 1248 | "output_type": "stream", 1249 | "text": [ 1250 | " 2%|█▎ | 7006/400000 [03:38<3:10:41, 34.35it/s]" 1251 | ] 1252 | }, 1253 | { 1254 | "name": "stdout", 1255 | "output_type": "stream", 1256 | "text": [ 1257 | "Epoch: 7000, Loss: 0.018567703664302826\n" 1258 | ] 1259 | }, 1260 | { 1261 | "name": "stderr", 1262 | "output_type": "stream", 1263 | "text": [ 1264 | " 2%|█▎ | 7106/400000 [03:41<3:18:42, 32.95it/s]" 1265 | ] 1266 | }, 1267 | { 1268 | "name": "stdout", 1269 | "output_type": "stream", 1270 | "text": [ 1271 | "Epoch: 7100, Loss: 0.01854969561100006\n" 1272 | ] 1273 | }, 1274 | { 1275 | "name": "stderr", 1276 | "output_type": "stream", 1277 | "text": [ 1278 | " 2%|█▎ | 7206/400000 [03:44<3:20:10, 32.70it/s]" 1279 | ] 1280 | }, 1281 | { 1282 | "name": "stdout", 1283 | "output_type": "stream", 1284 | "text": [ 1285 | "Epoch: 7200, Loss: 0.018530670553445816\n" 1286 | ] 1287 | }, 1288 | { 1289 | "name": "stderr", 1290 | "output_type": "stream", 1291 | "text": [ 1292 | " 2%|█▎ | 7306/400000 [03:47<3:11:22, 34.20it/s]" 1293 | ] 1294 | }, 1295 | { 1296 | "name": "stdout", 1297 | "output_type": "stream", 1298 | "text": [ 1299 | "Epoch: 7300, Loss: 0.01851271092891693\n" 1300 | ] 1301 | }, 1302 | { 1303 | "name": "stderr", 1304 | "output_type": "stream", 1305 | "text": [ 1306 | " 2%|█▎ | 7406/400000 [03:50<3:12:26, 34.00it/s]" 1307 | ] 1308 | }, 1309 | { 1310 | "name": "stdout", 1311 | "output_type": "stream", 1312 | "text": [ 1313 | "Epoch: 7400, Loss: 0.018492441624403\n" 1314 | ] 1315 | }, 1316 | { 1317 | "name": "stderr", 1318 | "output_type": "stream", 1319 | "text": [ 1320 | " 2%|█▍ | 7506/400000 [03:53<3:13:05, 33.88it/s]" 1321 | ] 1322 | }, 1323 | { 1324 | "name": "stdout", 1325 | "output_type": "stream", 1326 | "text": [ 1327 | "Epoch: 7500, Loss: 0.01846969500184059\n" 1328 | ] 1329 | }, 1330 | { 1331 | "name": "stderr", 1332 | "output_type": "stream", 1333 | "text": [ 1334 | " 2%|█▍ | 7606/400000 [03:56<3:13:55, 33.72it/s]" 1335 | ] 1336 | }, 1337 | { 1338 | "name": "stdout", 1339 | "output_type": "stream", 1340 | "text": [ 1341 | "Epoch: 7600, Loss: 0.018443284556269646\n" 1342 | ] 1343 | }, 1344 | { 1345 | "name": "stderr", 1346 | "output_type": "stream", 1347 | "text": [ 1348 | " 2%|█▍ | 7706/400000 [03:59<3:13:01, 33.87it/s]" 1349 | ] 1350 | }, 1351 | { 1352 | "name": "stdout", 1353 | "output_type": "stream", 1354 | "text": [ 1355 | "Epoch: 7700, Loss: 0.018415464088320732\n" 1356 | ] 1357 | }, 1358 | { 1359 | "name": "stderr", 1360 | "output_type": "stream", 1361 | "text": [ 1362 | " 2%|█▍ | 7806/400000 [04:02<3:10:36, 34.29it/s]" 1363 | ] 1364 | }, 1365 | { 1366 | "name": "stdout", 1367 | "output_type": "stream", 1368 | "text": [ 1369 | "Epoch: 7800, Loss: 0.018382159993052483\n" 1370 | ] 1371 | }, 1372 | { 1373 | "name": "stderr", 1374 | "output_type": "stream", 1375 | "text": [ 1376 | " 2%|█▍ | 7906/400000 [04:05<3:10:43, 34.26it/s]" 1377 | ] 1378 | }, 1379 | { 1380 | "name": "stdout", 1381 | "output_type": "stream", 1382 | "text": [ 1383 | "Epoch: 7900, Loss: 0.018340906128287315\n" 1384 | ] 1385 | }, 1386 | { 1387 | "name": "stderr", 1388 | "output_type": "stream", 1389 | "text": [ 1390 | " 2%|█▍ | 8006/400000 [04:07<3:10:33, 34.28it/s]" 1391 | ] 1392 | }, 1393 | { 1394 | "name": "stdout", 1395 | "output_type": "stream", 1396 | "text": [ 1397 | "Epoch: 8000, Loss: 0.01829325035214424\n" 1398 | ] 1399 | }, 1400 | { 1401 | "name": "stderr", 1402 | "output_type": "stream", 1403 | "text": [ 1404 | " 2%|█▍ | 8106/400000 [04:10<3:10:24, 34.30it/s]" 1405 | ] 1406 | }, 1407 | { 1408 | "name": "stdout", 1409 | "output_type": "stream", 1410 | "text": [ 1411 | "Epoch: 8100, Loss: 0.01823498122394085\n" 1412 | ] 1413 | }, 1414 | { 1415 | "name": "stderr", 1416 | "output_type": "stream", 1417 | "text": [ 1418 | " 2%|█▌ | 8206/400000 [04:13<3:14:56, 33.50it/s]" 1419 | ] 1420 | }, 1421 | { 1422 | "name": "stdout", 1423 | "output_type": "stream", 1424 | "text": [ 1425 | "Epoch: 8200, Loss: 0.01816398650407791\n" 1426 | ] 1427 | }, 1428 | { 1429 | "name": "stderr", 1430 | "output_type": "stream", 1431 | "text": [ 1432 | " 2%|█▌ | 8306/400000 [04:16<3:11:43, 34.05it/s]" 1433 | ] 1434 | }, 1435 | { 1436 | "name": "stdout", 1437 | "output_type": "stream", 1438 | "text": [ 1439 | "Epoch: 8300, Loss: 0.01805003173649311\n" 1440 | ] 1441 | }, 1442 | { 1443 | "name": "stderr", 1444 | "output_type": "stream", 1445 | "text": [ 1446 | " 2%|█▌ | 8406/400000 [04:19<3:10:39, 34.23it/s]" 1447 | ] 1448 | }, 1449 | { 1450 | "name": "stdout", 1451 | "output_type": "stream", 1452 | "text": [ 1453 | "Epoch: 8400, Loss: 0.01789727620780468\n" 1454 | ] 1455 | }, 1456 | { 1457 | "name": "stderr", 1458 | "output_type": "stream", 1459 | "text": [ 1460 | " 2%|█▌ | 8506/400000 [04:22<3:12:00, 33.98it/s]" 1461 | ] 1462 | }, 1463 | { 1464 | "name": "stdout", 1465 | "output_type": "stream", 1466 | "text": [ 1467 | "Epoch: 8500, Loss: 0.01767430454492569\n" 1468 | ] 1469 | }, 1470 | { 1471 | "name": "stderr", 1472 | "output_type": "stream", 1473 | "text": [ 1474 | " 2%|█▌ | 8606/400000 [04:25<3:12:17, 33.92it/s]" 1475 | ] 1476 | }, 1477 | { 1478 | "name": "stdout", 1479 | "output_type": "stream", 1480 | "text": [ 1481 | "Epoch: 8600, Loss: 0.017422545701265335\n" 1482 | ] 1483 | }, 1484 | { 1485 | "name": "stderr", 1486 | "output_type": "stream", 1487 | "text": [ 1488 | " 2%|█▌ | 8706/400000 [04:28<3:18:03, 32.93it/s]" 1489 | ] 1490 | }, 1491 | { 1492 | "name": "stdout", 1493 | "output_type": "stream", 1494 | "text": [ 1495 | "Epoch: 8700, Loss: 0.017174111679196358\n" 1496 | ] 1497 | }, 1498 | { 1499 | "name": "stderr", 1500 | "output_type": "stream", 1501 | "text": [ 1502 | " 2%|█▋ | 8806/400000 [04:31<3:18:58, 32.77it/s]" 1503 | ] 1504 | }, 1505 | { 1506 | "name": "stdout", 1507 | "output_type": "stream", 1508 | "text": [ 1509 | "Epoch: 8800, Loss: 0.016975797712802887\n" 1510 | ] 1511 | }, 1512 | { 1513 | "name": "stderr", 1514 | "output_type": "stream", 1515 | "text": [ 1516 | " 2%|█▋ | 8906/400000 [04:34<3:15:17, 33.38it/s]" 1517 | ] 1518 | }, 1519 | { 1520 | "name": "stdout", 1521 | "output_type": "stream", 1522 | "text": [ 1523 | "Epoch: 8900, Loss: 0.016635214909911156\n" 1524 | ] 1525 | }, 1526 | { 1527 | "name": "stderr", 1528 | "output_type": "stream", 1529 | "text": [ 1530 | " 2%|█▋ | 9006/400000 [04:37<3:11:16, 34.07it/s]" 1531 | ] 1532 | }, 1533 | { 1534 | "name": "stdout", 1535 | "output_type": "stream", 1536 | "text": [ 1537 | "Epoch: 9000, Loss: 0.016301561146974564\n" 1538 | ] 1539 | }, 1540 | { 1541 | "name": "stderr", 1542 | "output_type": "stream", 1543 | "text": [ 1544 | " 2%|█▋ | 9106/400000 [04:40<3:15:44, 33.28it/s]" 1545 | ] 1546 | }, 1547 | { 1548 | "name": "stdout", 1549 | "output_type": "stream", 1550 | "text": [ 1551 | "Epoch: 9100, Loss: 0.01584976725280285\n" 1552 | ] 1553 | }, 1554 | { 1555 | "name": "stderr", 1556 | "output_type": "stream", 1557 | "text": [ 1558 | " 2%|█▋ | 9206/400000 [04:43<3:19:12, 32.70it/s]" 1559 | ] 1560 | }, 1561 | { 1562 | "name": "stdout", 1563 | "output_type": "stream", 1564 | "text": [ 1565 | "Epoch: 9200, Loss: 0.015223436057567596\n" 1566 | ] 1567 | }, 1568 | { 1569 | "name": "stderr", 1570 | "output_type": "stream", 1571 | "text": [ 1572 | " 2%|█▋ | 9306/400000 [04:46<3:17:57, 32.89it/s]" 1573 | ] 1574 | }, 1575 | { 1576 | "name": "stdout", 1577 | "output_type": "stream", 1578 | "text": [ 1579 | "Epoch: 9300, Loss: 0.01453721895813942\n" 1580 | ] 1581 | }, 1582 | { 1583 | "name": "stderr", 1584 | "output_type": "stream", 1585 | "text": [ 1586 | " 2%|█▋ | 9406/400000 [04:49<3:18:21, 32.82it/s]" 1587 | ] 1588 | }, 1589 | { 1590 | "name": "stdout", 1591 | "output_type": "stream", 1592 | "text": [ 1593 | "Epoch: 9400, Loss: 0.013883044943213463\n" 1594 | ] 1595 | }, 1596 | { 1597 | "name": "stderr", 1598 | "output_type": "stream", 1599 | "text": [ 1600 | " 2%|█▊ | 9506/400000 [04:52<3:19:01, 32.70it/s]" 1601 | ] 1602 | }, 1603 | { 1604 | "name": "stdout", 1605 | "output_type": "stream", 1606 | "text": [ 1607 | "Epoch: 9500, Loss: 0.01327357068657875\n" 1608 | ] 1609 | }, 1610 | { 1611 | "name": "stderr", 1612 | "output_type": "stream", 1613 | "text": [ 1614 | " 2%|█▊ | 9606/400000 [04:55<3:15:58, 33.20it/s]" 1615 | ] 1616 | }, 1617 | { 1618 | "name": "stdout", 1619 | "output_type": "stream", 1620 | "text": [ 1621 | "Epoch: 9600, Loss: 0.012597311288118362\n" 1622 | ] 1623 | }, 1624 | { 1625 | "name": "stderr", 1626 | "output_type": "stream", 1627 | "text": [ 1628 | " 2%|█▊ | 9706/400000 [04:58<3:17:08, 32.99it/s]" 1629 | ] 1630 | }, 1631 | { 1632 | "name": "stdout", 1633 | "output_type": "stream", 1634 | "text": [ 1635 | "Epoch: 9700, Loss: 0.011933025903999805\n" 1636 | ] 1637 | }, 1638 | { 1639 | "name": "stderr", 1640 | "output_type": "stream", 1641 | "text": [ 1642 | " 2%|█▊ | 9806/400000 [05:01<3:15:26, 33.28it/s]" 1643 | ] 1644 | }, 1645 | { 1646 | "name": "stdout", 1647 | "output_type": "stream", 1648 | "text": [ 1649 | "Epoch: 9800, Loss: 0.01134602539241314\n" 1650 | ] 1651 | }, 1652 | { 1653 | "name": "stderr", 1654 | "output_type": "stream", 1655 | "text": [ 1656 | " 2%|█▊ | 9906/400000 [05:04<3:13:28, 33.61it/s]" 1657 | ] 1658 | }, 1659 | { 1660 | "name": "stdout", 1661 | "output_type": "stream", 1662 | "text": [ 1663 | "Epoch: 9900, Loss: 0.010891282930970192\n" 1664 | ] 1665 | }, 1666 | { 1667 | "name": "stderr", 1668 | "output_type": "stream", 1669 | "text": [ 1670 | " 3%|█▊ | 10006/400000 [05:07<3:10:34, 34.11it/s]" 1671 | ] 1672 | }, 1673 | { 1674 | "name": "stdout", 1675 | "output_type": "stream", 1676 | "text": [ 1677 | "Epoch: 10000, Loss: 0.010492591187357903\n" 1678 | ] 1679 | }, 1680 | { 1681 | "name": "stderr", 1682 | "output_type": "stream", 1683 | "text": [ 1684 | " 3%|█▊ | 10106/400000 [05:10<3:10:23, 34.13it/s]" 1685 | ] 1686 | }, 1687 | { 1688 | "name": "stdout", 1689 | "output_type": "stream", 1690 | "text": [ 1691 | "Epoch: 10100, Loss: 0.010091446340084076\n" 1692 | ] 1693 | }, 1694 | { 1695 | "name": "stderr", 1696 | "output_type": "stream", 1697 | "text": [ 1698 | " 3%|█▊ | 10206/400000 [05:13<3:11:14, 33.97it/s]" 1699 | ] 1700 | }, 1701 | { 1702 | "name": "stdout", 1703 | "output_type": "stream", 1704 | "text": [ 1705 | "Epoch: 10200, Loss: 0.009717389941215515\n" 1706 | ] 1707 | }, 1708 | { 1709 | "name": "stderr", 1710 | "output_type": "stream", 1711 | "text": [ 1712 | " 3%|█▉ | 10306/400000 [05:16<3:10:02, 34.17it/s]" 1713 | ] 1714 | }, 1715 | { 1716 | "name": "stdout", 1717 | "output_type": "stream", 1718 | "text": [ 1719 | "Epoch: 10300, Loss: 0.009416280314326286\n" 1720 | ] 1721 | }, 1722 | { 1723 | "name": "stderr", 1724 | "output_type": "stream", 1725 | "text": [ 1726 | " 3%|█▉ | 10406/400000 [05:19<3:09:10, 34.32it/s]" 1727 | ] 1728 | }, 1729 | { 1730 | "name": "stdout", 1731 | "output_type": "stream", 1732 | "text": [ 1733 | "Epoch: 10400, Loss: 0.009125900454819202\n" 1734 | ] 1735 | }, 1736 | { 1737 | "name": "stderr", 1738 | "output_type": "stream", 1739 | "text": [ 1740 | " 3%|█▉ | 10506/400000 [05:22<3:15:15, 33.25it/s]" 1741 | ] 1742 | }, 1743 | { 1744 | "name": "stdout", 1745 | "output_type": "stream", 1746 | "text": [ 1747 | "Epoch: 10500, Loss: 0.008827228099107742\n" 1748 | ] 1749 | }, 1750 | { 1751 | "name": "stderr", 1752 | "output_type": "stream", 1753 | "text": [ 1754 | " 3%|█▉ | 10606/400000 [05:25<3:15:59, 33.11it/s]" 1755 | ] 1756 | }, 1757 | { 1758 | "name": "stdout", 1759 | "output_type": "stream", 1760 | "text": [ 1761 | "Epoch: 10600, Loss: 0.008546670898795128\n" 1762 | ] 1763 | }, 1764 | { 1765 | "name": "stderr", 1766 | "output_type": "stream", 1767 | "text": [ 1768 | " 3%|█▉ | 10706/400000 [05:28<3:11:08, 33.95it/s]" 1769 | ] 1770 | }, 1771 | { 1772 | "name": "stdout", 1773 | "output_type": "stream", 1774 | "text": [ 1775 | "Epoch: 10700, Loss: 0.008290925994515419\n" 1776 | ] 1777 | }, 1778 | { 1779 | "name": "stderr", 1780 | "output_type": "stream", 1781 | "text": [ 1782 | " 3%|█▉ | 10806/400000 [05:31<3:12:02, 33.78it/s]" 1783 | ] 1784 | }, 1785 | { 1786 | "name": "stdout", 1787 | "output_type": "stream", 1788 | "text": [ 1789 | "Epoch: 10800, Loss: 0.008069833740592003\n" 1790 | ] 1791 | }, 1792 | { 1793 | "name": "stderr", 1794 | "output_type": "stream", 1795 | "text": [ 1796 | " 3%|█▉ | 10906/400000 [05:34<3:10:11, 34.10it/s]" 1797 | ] 1798 | }, 1799 | { 1800 | "name": "stdout", 1801 | "output_type": "stream", 1802 | "text": [ 1803 | "Epoch: 10900, Loss: 0.007874855771660805\n" 1804 | ] 1805 | }, 1806 | { 1807 | "name": "stderr", 1808 | "output_type": "stream", 1809 | "text": [ 1810 | " 3%|██ | 11006/400000 [05:37<3:09:52, 34.14it/s]" 1811 | ] 1812 | }, 1813 | { 1814 | "name": "stdout", 1815 | "output_type": "stream", 1816 | "text": [ 1817 | "Epoch: 11000, Loss: 0.007657877169549465\n" 1818 | ] 1819 | }, 1820 | { 1821 | "name": "stderr", 1822 | "output_type": "stream", 1823 | "text": [ 1824 | " 3%|██ | 11106/400000 [05:40<3:10:04, 34.10it/s]" 1825 | ] 1826 | }, 1827 | { 1828 | "name": "stdout", 1829 | "output_type": "stream", 1830 | "text": [ 1831 | "Epoch: 11100, Loss: 0.007426219526678324\n" 1832 | ] 1833 | }, 1834 | { 1835 | "name": "stderr", 1836 | "output_type": "stream", 1837 | "text": [ 1838 | " 3%|██ | 11206/400000 [05:43<3:09:04, 34.27it/s]" 1839 | ] 1840 | }, 1841 | { 1842 | "name": "stdout", 1843 | "output_type": "stream", 1844 | "text": [ 1845 | "Epoch: 11200, Loss: 0.007215453311800957\n" 1846 | ] 1847 | }, 1848 | { 1849 | "name": "stderr", 1850 | "output_type": "stream", 1851 | "text": [ 1852 | " 3%|██ | 11306/400000 [05:46<3:11:18, 33.86it/s]" 1853 | ] 1854 | }, 1855 | { 1856 | "name": "stdout", 1857 | "output_type": "stream", 1858 | "text": [ 1859 | "Epoch: 11300, Loss: 0.007004348561167717\n" 1860 | ] 1861 | }, 1862 | { 1863 | "name": "stderr", 1864 | "output_type": "stream", 1865 | "text": [ 1866 | " 3%|██ | 11406/400000 [05:49<3:09:38, 34.15it/s]" 1867 | ] 1868 | }, 1869 | { 1870 | "name": "stdout", 1871 | "output_type": "stream", 1872 | "text": [ 1873 | "Epoch: 11400, Loss: 0.006819139234721661\n" 1874 | ] 1875 | }, 1876 | { 1877 | "name": "stderr", 1878 | "output_type": "stream", 1879 | "text": [ 1880 | " 3%|██ | 11506/400000 [05:52<3:09:45, 34.12it/s]" 1881 | ] 1882 | }, 1883 | { 1884 | "name": "stdout", 1885 | "output_type": "stream", 1886 | "text": [ 1887 | "Epoch: 11500, Loss: 0.006622501648962498\n" 1888 | ] 1889 | }, 1890 | { 1891 | "name": "stderr", 1892 | "output_type": "stream", 1893 | "text": [ 1894 | " 3%|██ | 11606/400000 [05:55<3:12:20, 33.66it/s]" 1895 | ] 1896 | }, 1897 | { 1898 | "name": "stdout", 1899 | "output_type": "stream", 1900 | "text": [ 1901 | "Epoch: 11600, Loss: 0.006417619064450264\n" 1902 | ] 1903 | }, 1904 | { 1905 | "name": "stderr", 1906 | "output_type": "stream", 1907 | "text": [ 1908 | " 3%|██▏ | 11706/400000 [05:58<3:16:25, 32.95it/s]" 1909 | ] 1910 | }, 1911 | { 1912 | "name": "stdout", 1913 | "output_type": "stream", 1914 | "text": [ 1915 | "Epoch: 11700, Loss: 0.006225738674402237\n" 1916 | ] 1917 | }, 1918 | { 1919 | "name": "stderr", 1920 | "output_type": "stream", 1921 | "text": [ 1922 | " 3%|██▏ | 11806/400000 [06:00<3:09:14, 34.19it/s]" 1923 | ] 1924 | }, 1925 | { 1926 | "name": "stdout", 1927 | "output_type": "stream", 1928 | "text": [ 1929 | "Epoch: 11800, Loss: 0.006055377423763275\n" 1930 | ] 1931 | }, 1932 | { 1933 | "name": "stderr", 1934 | "output_type": "stream", 1935 | "text": [ 1936 | " 3%|██▏ | 11906/400000 [06:03<3:14:13, 33.30it/s]" 1937 | ] 1938 | }, 1939 | { 1940 | "name": "stdout", 1941 | "output_type": "stream", 1942 | "text": [ 1943 | "Epoch: 11900, Loss: 0.005899547599256039\n" 1944 | ] 1945 | }, 1946 | { 1947 | "name": "stderr", 1948 | "output_type": "stream", 1949 | "text": [ 1950 | " 3%|██▏ | 12006/400000 [06:06<3:12:23, 33.61it/s]" 1951 | ] 1952 | }, 1953 | { 1954 | "name": "stdout", 1955 | "output_type": "stream", 1956 | "text": [ 1957 | "Epoch: 12000, Loss: 0.005745738744735718\n" 1958 | ] 1959 | }, 1960 | { 1961 | "name": "stderr", 1962 | "output_type": "stream", 1963 | "text": [ 1964 | " 3%|██▏ | 12106/400000 [06:09<3:10:12, 33.99it/s]" 1965 | ] 1966 | }, 1967 | { 1968 | "name": "stdout", 1969 | "output_type": "stream", 1970 | "text": [ 1971 | "Epoch: 12100, Loss: 0.005590967833995819\n" 1972 | ] 1973 | }, 1974 | { 1975 | "name": "stderr", 1976 | "output_type": "stream", 1977 | "text": [ 1978 | " 3%|██▏ | 12206/400000 [06:12<3:10:50, 33.87it/s]" 1979 | ] 1980 | }, 1981 | { 1982 | "name": "stdout", 1983 | "output_type": "stream", 1984 | "text": [ 1985 | "Epoch: 12200, Loss: 0.005434120539575815\n" 1986 | ] 1987 | }, 1988 | { 1989 | "name": "stderr", 1990 | "output_type": "stream", 1991 | "text": [ 1992 | " 3%|██▏ | 12306/400000 [06:15<3:14:16, 33.26it/s]" 1993 | ] 1994 | }, 1995 | { 1996 | "name": "stdout", 1997 | "output_type": "stream", 1998 | "text": [ 1999 | "Epoch: 12300, Loss: 0.005294671282172203\n" 2000 | ] 2001 | }, 2002 | { 2003 | "name": "stderr", 2004 | "output_type": "stream", 2005 | "text": [ 2006 | " 3%|██▎ | 12406/400000 [06:18<3:14:57, 33.13it/s]" 2007 | ] 2008 | }, 2009 | { 2010 | "name": "stdout", 2011 | "output_type": "stream", 2012 | "text": [ 2013 | "Epoch: 12400, Loss: 0.005152757279574871\n" 2014 | ] 2015 | }, 2016 | { 2017 | "name": "stderr", 2018 | "output_type": "stream", 2019 | "text": [ 2020 | " 3%|██▎ | 12506/400000 [06:21<3:09:37, 34.06it/s]" 2021 | ] 2022 | }, 2023 | { 2024 | "name": "stdout", 2025 | "output_type": "stream", 2026 | "text": [ 2027 | "Epoch: 12500, Loss: 0.005017283372581005\n" 2028 | ] 2029 | }, 2030 | { 2031 | "name": "stderr", 2032 | "output_type": "stream", 2033 | "text": [ 2034 | " 3%|██▎ | 12606/400000 [06:24<3:15:46, 32.98it/s]" 2035 | ] 2036 | }, 2037 | { 2038 | "name": "stdout", 2039 | "output_type": "stream", 2040 | "text": [ 2041 | "Epoch: 12600, Loss: 0.004882470704615116\n" 2042 | ] 2043 | }, 2044 | { 2045 | "name": "stderr", 2046 | "output_type": "stream", 2047 | "text": [ 2048 | " 3%|██▎ | 12706/400000 [06:27<3:09:11, 34.12it/s]" 2049 | ] 2050 | }, 2051 | { 2052 | "name": "stdout", 2053 | "output_type": "stream", 2054 | "text": [ 2055 | "Epoch: 12700, Loss: 0.004721592180430889\n" 2056 | ] 2057 | }, 2058 | { 2059 | "name": "stderr", 2060 | "output_type": "stream", 2061 | "text": [ 2062 | " 3%|██▎ | 12806/400000 [06:30<3:09:56, 33.98it/s]" 2063 | ] 2064 | }, 2065 | { 2066 | "name": "stdout", 2067 | "output_type": "stream", 2068 | "text": [ 2069 | "Epoch: 12800, Loss: 0.004539059475064278\n" 2070 | ] 2071 | }, 2072 | { 2073 | "name": "stderr", 2074 | "output_type": "stream", 2075 | "text": [ 2076 | " 3%|██▎ | 12906/400000 [06:33<3:10:09, 33.93it/s]" 2077 | ] 2078 | }, 2079 | { 2080 | "name": "stdout", 2081 | "output_type": "stream", 2082 | "text": [ 2083 | "Epoch: 12900, Loss: 0.004390896297991276\n" 2084 | ] 2085 | }, 2086 | { 2087 | "name": "stderr", 2088 | "output_type": "stream", 2089 | "text": [ 2090 | " 3%|██▎ | 13006/400000 [06:36<3:16:39, 32.80it/s]" 2091 | ] 2092 | }, 2093 | { 2094 | "name": "stdout", 2095 | "output_type": "stream", 2096 | "text": [ 2097 | "Epoch: 13000, Loss: 0.00424271821975708\n" 2098 | ] 2099 | }, 2100 | { 2101 | "name": "stderr", 2102 | "output_type": "stream", 2103 | "text": [ 2104 | " 3%|██▍ | 13106/400000 [06:39<3:09:43, 33.99it/s]" 2105 | ] 2106 | }, 2107 | { 2108 | "name": "stdout", 2109 | "output_type": "stream", 2110 | "text": [ 2111 | "Epoch: 13100, Loss: 0.004100188612937927\n" 2112 | ] 2113 | }, 2114 | { 2115 | "name": "stderr", 2116 | "output_type": "stream", 2117 | "text": [ 2118 | " 3%|██▍ | 13205/400000 [06:42<3:16:56, 32.73it/s]" 2119 | ] 2120 | }, 2121 | { 2122 | "name": "stdout", 2123 | "output_type": "stream", 2124 | "text": [ 2125 | "Epoch: 13200, Loss: 0.003973159473389387\n" 2126 | ] 2127 | }, 2128 | { 2129 | "name": "stderr", 2130 | "output_type": "stream", 2131 | "text": [ 2132 | " 3%|██▍ | 13305/400000 [06:45<3:10:36, 33.81it/s]" 2133 | ] 2134 | }, 2135 | { 2136 | "name": "stdout", 2137 | "output_type": "stream", 2138 | "text": [ 2139 | "Epoch: 13300, Loss: 0.0038480767980217934\n" 2140 | ] 2141 | }, 2142 | { 2143 | "name": "stderr", 2144 | "output_type": "stream", 2145 | "text": [ 2146 | " 3%|██▍ | 13405/400000 [06:48<3:09:34, 33.99it/s]" 2147 | ] 2148 | }, 2149 | { 2150 | "name": "stdout", 2151 | "output_type": "stream", 2152 | "text": [ 2153 | "Epoch: 13400, Loss: 0.0037062910851091146\n" 2154 | ] 2155 | }, 2156 | { 2157 | "name": "stderr", 2158 | "output_type": "stream", 2159 | "text": [ 2160 | " 3%|██▍ | 13505/400000 [06:51<3:10:57, 33.73it/s]" 2161 | ] 2162 | }, 2163 | { 2164 | "name": "stdout", 2165 | "output_type": "stream", 2166 | "text": [ 2167 | "Epoch: 13500, Loss: 0.003580627031624317\n" 2168 | ] 2169 | }, 2170 | { 2171 | "name": "stderr", 2172 | "output_type": "stream", 2173 | "text": [ 2174 | " 3%|██▍ | 13605/400000 [06:54<3:10:17, 33.84it/s]" 2175 | ] 2176 | }, 2177 | { 2178 | "name": "stdout", 2179 | "output_type": "stream", 2180 | "text": [ 2181 | "Epoch: 13600, Loss: 0.0034587853588163853\n" 2182 | ] 2183 | }, 2184 | { 2185 | "name": "stderr", 2186 | "output_type": "stream", 2187 | "text": [ 2188 | " 3%|██▌ | 13705/400000 [06:57<3:16:50, 32.71it/s]" 2189 | ] 2190 | }, 2191 | { 2192 | "name": "stdout", 2193 | "output_type": "stream", 2194 | "text": [ 2195 | "Epoch: 13700, Loss: 0.0033462122082710266\n" 2196 | ] 2197 | }, 2198 | { 2199 | "name": "stderr", 2200 | "output_type": "stream", 2201 | "text": [ 2202 | " 3%|██▌ | 13805/400000 [07:00<3:15:16, 32.96it/s]" 2203 | ] 2204 | }, 2205 | { 2206 | "name": "stdout", 2207 | "output_type": "stream", 2208 | "text": [ 2209 | "Epoch: 13800, Loss: 0.003229689784348011\n" 2210 | ] 2211 | }, 2212 | { 2213 | "name": "stderr", 2214 | "output_type": "stream", 2215 | "text": [ 2216 | " 3%|██▌ | 13905/400000 [07:03<3:11:27, 33.61it/s]" 2217 | ] 2218 | }, 2219 | { 2220 | "name": "stdout", 2221 | "output_type": "stream", 2222 | "text": [ 2223 | "Epoch: 13900, Loss: 0.0031203534454107285\n" 2224 | ] 2225 | }, 2226 | { 2227 | "name": "stderr", 2228 | "output_type": "stream", 2229 | "text": [ 2230 | " 4%|██▌ | 14005/400000 [07:06<3:13:52, 33.18it/s]" 2231 | ] 2232 | }, 2233 | { 2234 | "name": "stdout", 2235 | "output_type": "stream", 2236 | "text": [ 2237 | "Epoch: 14000, Loss: 0.003006403334438801\n" 2238 | ] 2239 | }, 2240 | { 2241 | "name": "stderr", 2242 | "output_type": "stream", 2243 | "text": [ 2244 | " 4%|██▌ | 14105/400000 [07:09<3:15:08, 32.96it/s]" 2245 | ] 2246 | }, 2247 | { 2248 | "name": "stdout", 2249 | "output_type": "stream", 2250 | "text": [ 2251 | "Epoch: 14100, Loss: 0.0028928935062140226\n" 2252 | ] 2253 | }, 2254 | { 2255 | "name": "stderr", 2256 | "output_type": "stream", 2257 | "text": [ 2258 | " 4%|██▌ | 14205/400000 [07:12<3:09:26, 33.94it/s]" 2259 | ] 2260 | }, 2261 | { 2262 | "name": "stdout", 2263 | "output_type": "stream", 2264 | "text": [ 2265 | "Epoch: 14200, Loss: 0.002797055756673217\n" 2266 | ] 2267 | }, 2268 | { 2269 | "name": "stderr", 2270 | "output_type": "stream", 2271 | "text": [ 2272 | " 4%|██▌ | 14305/400000 [07:15<3:12:50, 33.33it/s]" 2273 | ] 2274 | }, 2275 | { 2276 | "name": "stdout", 2277 | "output_type": "stream", 2278 | "text": [ 2279 | "Epoch: 14300, Loss: 0.0027127291541546583\n" 2280 | ] 2281 | }, 2282 | { 2283 | "name": "stderr", 2284 | "output_type": "stream", 2285 | "text": [ 2286 | " 4%|██▋ | 14405/400000 [07:18<3:12:57, 33.30it/s]" 2287 | ] 2288 | }, 2289 | { 2290 | "name": "stdout", 2291 | "output_type": "stream", 2292 | "text": [ 2293 | "Epoch: 14400, Loss: 0.002636359306052327\n" 2294 | ] 2295 | }, 2296 | { 2297 | "name": "stderr", 2298 | "output_type": "stream", 2299 | "text": [ 2300 | " 4%|██▋ | 14505/400000 [07:21<3:11:42, 33.52it/s]" 2301 | ] 2302 | }, 2303 | { 2304 | "name": "stdout", 2305 | "output_type": "stream", 2306 | "text": [ 2307 | "Epoch: 14500, Loss: 0.0025487190578132868\n" 2308 | ] 2309 | }, 2310 | { 2311 | "name": "stderr", 2312 | "output_type": "stream", 2313 | "text": [ 2314 | " 4%|██▋ | 14605/400000 [07:24<3:11:57, 33.46it/s]" 2315 | ] 2316 | }, 2317 | { 2318 | "name": "stdout", 2319 | "output_type": "stream", 2320 | "text": [ 2321 | "Epoch: 14600, Loss: 0.0024599176831543446\n" 2322 | ] 2323 | }, 2324 | { 2325 | "name": "stderr", 2326 | "output_type": "stream", 2327 | "text": [ 2328 | " 4%|██▋ | 14705/400000 [07:27<3:10:21, 33.73it/s]" 2329 | ] 2330 | }, 2331 | { 2332 | "name": "stdout", 2333 | "output_type": "stream", 2334 | "text": [ 2335 | "Epoch: 14700, Loss: 0.002384522929787636\n" 2336 | ] 2337 | }, 2338 | { 2339 | "name": "stderr", 2340 | "output_type": "stream", 2341 | "text": [ 2342 | " 4%|██▋ | 14805/400000 [07:30<3:08:44, 34.02it/s]" 2343 | ] 2344 | }, 2345 | { 2346 | "name": "stdout", 2347 | "output_type": "stream", 2348 | "text": [ 2349 | "Epoch: 14800, Loss: 0.0023042152170091867\n" 2350 | ] 2351 | }, 2352 | { 2353 | "name": "stderr", 2354 | "output_type": "stream", 2355 | "text": [ 2356 | " 4%|██▋ | 14905/400000 [07:33<3:12:01, 33.42it/s]" 2357 | ] 2358 | }, 2359 | { 2360 | "name": "stdout", 2361 | "output_type": "stream", 2362 | "text": [ 2363 | "Epoch: 14900, Loss: 0.002226074691861868\n" 2364 | ] 2365 | }, 2366 | { 2367 | "name": "stderr", 2368 | "output_type": "stream", 2369 | "text": [ 2370 | " 4%|██▋ | 15005/400000 [07:36<3:08:20, 34.07it/s]" 2371 | ] 2372 | }, 2373 | { 2374 | "name": "stdout", 2375 | "output_type": "stream", 2376 | "text": [ 2377 | "Epoch: 15000, Loss: 0.0021523027680814266\n" 2378 | ] 2379 | }, 2380 | { 2381 | "name": "stderr", 2382 | "output_type": "stream", 2383 | "text": [ 2384 | " 4%|██▊ | 15105/400000 [07:39<3:09:30, 33.85it/s]" 2385 | ] 2386 | }, 2387 | { 2388 | "name": "stdout", 2389 | "output_type": "stream", 2390 | "text": [ 2391 | "Epoch: 15100, Loss: 0.0020822230726480484\n" 2392 | ] 2393 | }, 2394 | { 2395 | "name": "stderr", 2396 | "output_type": "stream", 2397 | "text": [ 2398 | " 4%|██▊ | 15205/400000 [07:42<3:21:04, 31.89it/s]" 2399 | ] 2400 | }, 2401 | { 2402 | "name": "stdout", 2403 | "output_type": "stream", 2404 | "text": [ 2405 | "Epoch: 15200, Loss: 0.002015439560636878\n" 2406 | ] 2407 | }, 2408 | { 2409 | "name": "stderr", 2410 | "output_type": "stream", 2411 | "text": [ 2412 | " 4%|██▊ | 15305/400000 [07:45<3:16:03, 32.70it/s]" 2413 | ] 2414 | }, 2415 | { 2416 | "name": "stdout", 2417 | "output_type": "stream", 2418 | "text": [ 2419 | "Epoch: 15300, Loss: 0.001954846316948533\n" 2420 | ] 2421 | }, 2422 | { 2423 | "name": "stderr", 2424 | "output_type": "stream", 2425 | "text": [ 2426 | " 4%|██▊ | 15405/400000 [07:48<3:12:58, 33.22it/s]" 2427 | ] 2428 | }, 2429 | { 2430 | "name": "stdout", 2431 | "output_type": "stream", 2432 | "text": [ 2433 | "Epoch: 15400, Loss: 0.001895878347568214\n" 2434 | ] 2435 | }, 2436 | { 2437 | "name": "stderr", 2438 | "output_type": "stream", 2439 | "text": [ 2440 | " 4%|██▊ | 15505/400000 [07:51<3:13:50, 33.06it/s]" 2441 | ] 2442 | }, 2443 | { 2444 | "name": "stdout", 2445 | "output_type": "stream", 2446 | "text": [ 2447 | "Epoch: 15500, Loss: 0.0018333465559408069\n" 2448 | ] 2449 | }, 2450 | { 2451 | "name": "stderr", 2452 | "output_type": "stream", 2453 | "text": [ 2454 | " 4%|██▊ | 15605/400000 [07:54<3:08:52, 33.92it/s]" 2455 | ] 2456 | }, 2457 | { 2458 | "name": "stdout", 2459 | "output_type": "stream", 2460 | "text": [ 2461 | "Epoch: 15600, Loss: 0.0017749739345163107\n" 2462 | ] 2463 | }, 2464 | { 2465 | "name": "stderr", 2466 | "output_type": "stream", 2467 | "text": [ 2468 | " 4%|██▊ | 15705/400000 [07:57<3:14:14, 32.97it/s]" 2469 | ] 2470 | }, 2471 | { 2472 | "name": "stdout", 2473 | "output_type": "stream", 2474 | "text": [ 2475 | "Epoch: 15700, Loss: 0.0017174545209854841\n" 2476 | ] 2477 | }, 2478 | { 2479 | "name": "stderr", 2480 | "output_type": "stream", 2481 | "text": [ 2482 | " 4%|██▉ | 15805/400000 [08:00<3:16:39, 32.56it/s]" 2483 | ] 2484 | }, 2485 | { 2486 | "name": "stdout", 2487 | "output_type": "stream", 2488 | "text": [ 2489 | "Epoch: 15800, Loss: 0.0016633366467431188\n" 2490 | ] 2491 | }, 2492 | { 2493 | "name": "stderr", 2494 | "output_type": "stream", 2495 | "text": [ 2496 | " 4%|██▉ | 15905/400000 [08:03<3:12:50, 33.20it/s]" 2497 | ] 2498 | }, 2499 | { 2500 | "name": "stdout", 2501 | "output_type": "stream", 2502 | "text": [ 2503 | "Epoch: 15900, Loss: 0.0016164914704859257\n" 2504 | ] 2505 | }, 2506 | { 2507 | "name": "stderr", 2508 | "output_type": "stream", 2509 | "text": [ 2510 | " 4%|██▉ | 16005/400000 [08:06<3:16:44, 32.53it/s]" 2511 | ] 2512 | }, 2513 | { 2514 | "name": "stdout", 2515 | "output_type": "stream", 2516 | "text": [ 2517 | "Epoch: 16000, Loss: 0.0015715844929218292\n" 2518 | ] 2519 | }, 2520 | { 2521 | "name": "stderr", 2522 | "output_type": "stream", 2523 | "text": [ 2524 | " 4%|██▉ | 16105/400000 [08:09<3:16:46, 32.51it/s]" 2525 | ] 2526 | }, 2527 | { 2528 | "name": "stdout", 2529 | "output_type": "stream", 2530 | "text": [ 2531 | "Epoch: 16100, Loss: 0.0015310491435229778\n" 2532 | ] 2533 | }, 2534 | { 2535 | "name": "stderr", 2536 | "output_type": "stream", 2537 | "text": [ 2538 | " 4%|██▉ | 16205/400000 [08:12<3:12:44, 33.19it/s]" 2539 | ] 2540 | }, 2541 | { 2542 | "name": "stdout", 2543 | "output_type": "stream", 2544 | "text": [ 2545 | "Epoch: 16200, Loss: 0.001491072471253574\n" 2546 | ] 2547 | }, 2548 | { 2549 | "name": "stderr", 2550 | "output_type": "stream", 2551 | "text": [ 2552 | " 4%|██▉ | 16305/400000 [08:15<3:11:08, 33.46it/s]" 2553 | ] 2554 | }, 2555 | { 2556 | "name": "stdout", 2557 | "output_type": "stream", 2558 | "text": [ 2559 | "Epoch: 16300, Loss: 0.001449777279049158\n" 2560 | ] 2561 | }, 2562 | { 2563 | "name": "stderr", 2564 | "output_type": "stream", 2565 | "text": [ 2566 | " 4%|██▉ | 16405/400000 [08:18<3:10:37, 33.54it/s]" 2567 | ] 2568 | }, 2569 | { 2570 | "name": "stdout", 2571 | "output_type": "stream", 2572 | "text": [ 2573 | "Epoch: 16400, Loss: 0.001402585068717599\n" 2574 | ] 2575 | }, 2576 | { 2577 | "name": "stderr", 2578 | "output_type": "stream", 2579 | "text": [ 2580 | " 4%|███ | 16505/400000 [08:21<3:16:17, 32.56it/s]" 2581 | ] 2582 | }, 2583 | { 2584 | "name": "stdout", 2585 | "output_type": "stream", 2586 | "text": [ 2587 | "Epoch: 16500, Loss: 0.0013651316985487938\n" 2588 | ] 2589 | }, 2590 | { 2591 | "name": "stderr", 2592 | "output_type": "stream", 2593 | "text": [ 2594 | " 4%|███ | 16605/400000 [08:24<3:12:21, 33.22it/s]" 2595 | ] 2596 | }, 2597 | { 2598 | "name": "stdout", 2599 | "output_type": "stream", 2600 | "text": [ 2601 | "Epoch: 16600, Loss: 0.0013287143083289266\n" 2602 | ] 2603 | }, 2604 | { 2605 | "name": "stderr", 2606 | "output_type": "stream", 2607 | "text": [ 2608 | " 4%|███ | 16705/400000 [08:27<3:07:41, 34.04it/s]" 2609 | ] 2610 | }, 2611 | { 2612 | "name": "stdout", 2613 | "output_type": "stream", 2614 | "text": [ 2615 | "Epoch: 16700, Loss: 0.0012956298887729645\n" 2616 | ] 2617 | }, 2618 | { 2619 | "name": "stderr", 2620 | "output_type": "stream", 2621 | "text": [ 2622 | " 4%|███ | 16805/400000 [08:30<3:12:19, 33.21it/s]" 2623 | ] 2624 | }, 2625 | { 2626 | "name": "stdout", 2627 | "output_type": "stream", 2628 | "text": [ 2629 | "Epoch: 16800, Loss: 0.0012641751673072577\n" 2630 | ] 2631 | }, 2632 | { 2633 | "name": "stderr", 2634 | "output_type": "stream", 2635 | "text": [ 2636 | " 4%|███ | 16905/400000 [08:33<3:18:39, 32.14it/s]" 2637 | ] 2638 | }, 2639 | { 2640 | "name": "stdout", 2641 | "output_type": "stream", 2642 | "text": [ 2643 | "Epoch: 16900, Loss: 0.0012334123020991683\n" 2644 | ] 2645 | }, 2646 | { 2647 | "name": "stderr", 2648 | "output_type": "stream", 2649 | "text": [ 2650 | " 4%|███ | 17005/400000 [08:36<3:12:29, 33.16it/s]" 2651 | ] 2652 | }, 2653 | { 2654 | "name": "stdout", 2655 | "output_type": "stream", 2656 | "text": [ 2657 | "Epoch: 17000, Loss: 0.001201612176373601\n" 2658 | ] 2659 | }, 2660 | { 2661 | "name": "stderr", 2662 | "output_type": "stream", 2663 | "text": [ 2664 | " 4%|███ | 17105/400000 [08:39<3:08:49, 33.80it/s]" 2665 | ] 2666 | }, 2667 | { 2668 | "name": "stdout", 2669 | "output_type": "stream", 2670 | "text": [ 2671 | "Epoch: 17100, Loss: 0.0011693203123286366\n" 2672 | ] 2673 | }, 2674 | { 2675 | "name": "stderr", 2676 | "output_type": "stream", 2677 | "text": [ 2678 | " 4%|███▏ | 17205/400000 [08:42<3:09:37, 33.64it/s]" 2679 | ] 2680 | }, 2681 | { 2682 | "name": "stdout", 2683 | "output_type": "stream", 2684 | "text": [ 2685 | "Epoch: 17200, Loss: 0.001135010621510446\n" 2686 | ] 2687 | }, 2688 | { 2689 | "name": "stderr", 2690 | "output_type": "stream", 2691 | "text": [ 2692 | " 4%|███▏ | 17305/400000 [08:45<3:11:27, 33.31it/s]" 2693 | ] 2694 | }, 2695 | { 2696 | "name": "stdout", 2697 | "output_type": "stream", 2698 | "text": [ 2699 | "Epoch: 17300, Loss: 0.0011049320455640554\n" 2700 | ] 2701 | }, 2702 | { 2703 | "name": "stderr", 2704 | "output_type": "stream", 2705 | "text": [ 2706 | " 4%|███▏ | 17405/400000 [08:48<3:10:11, 33.53it/s]" 2707 | ] 2708 | }, 2709 | { 2710 | "name": "stdout", 2711 | "output_type": "stream", 2712 | "text": [ 2713 | "Epoch: 17400, Loss: 0.0010736689437180758\n" 2714 | ] 2715 | }, 2716 | { 2717 | "name": "stderr", 2718 | "output_type": "stream", 2719 | "text": [ 2720 | " 4%|███▏ | 17505/400000 [08:51<3:09:20, 33.67it/s]" 2721 | ] 2722 | }, 2723 | { 2724 | "name": "stdout", 2725 | "output_type": "stream", 2726 | "text": [ 2727 | "Epoch: 17500, Loss: 0.0010446715168654919\n" 2728 | ] 2729 | }, 2730 | { 2731 | "name": "stderr", 2732 | "output_type": "stream", 2733 | "text": [ 2734 | " 4%|███▏ | 17605/400000 [08:54<3:08:38, 33.78it/s]" 2735 | ] 2736 | }, 2737 | { 2738 | "name": "stdout", 2739 | "output_type": "stream", 2740 | "text": [ 2741 | "Epoch: 17600, Loss: 0.001017733826301992\n" 2742 | ] 2743 | }, 2744 | { 2745 | "name": "stderr", 2746 | "output_type": "stream", 2747 | "text": [ 2748 | " 4%|███▏ | 17705/400000 [08:57<3:08:11, 33.86it/s]" 2749 | ] 2750 | }, 2751 | { 2752 | "name": "stdout", 2753 | "output_type": "stream", 2754 | "text": [ 2755 | "Epoch: 17700, Loss: 0.0009936820715665817\n" 2756 | ] 2757 | }, 2758 | { 2759 | "name": "stderr", 2760 | "output_type": "stream", 2761 | "text": [ 2762 | " 4%|███▏ | 17805/400000 [09:00<3:16:05, 32.49it/s]" 2763 | ] 2764 | }, 2765 | { 2766 | "name": "stdout", 2767 | "output_type": "stream", 2768 | "text": [ 2769 | "Epoch: 17800, Loss: 0.0009690277511253953\n" 2770 | ] 2771 | }, 2772 | { 2773 | "name": "stderr", 2774 | "output_type": "stream", 2775 | "text": [ 2776 | " 4%|███▎ | 17905/400000 [09:03<3:12:37, 33.06it/s]" 2777 | ] 2778 | }, 2779 | { 2780 | "name": "stdout", 2781 | "output_type": "stream", 2782 | "text": [ 2783 | "Epoch: 17900, Loss: 0.0009501700988039374\n" 2784 | ] 2785 | }, 2786 | { 2787 | "name": "stderr", 2788 | "output_type": "stream", 2789 | "text": [ 2790 | " 5%|███▎ | 18005/400000 [09:06<3:15:57, 32.49it/s]" 2791 | ] 2792 | }, 2793 | { 2794 | "name": "stdout", 2795 | "output_type": "stream", 2796 | "text": [ 2797 | "Epoch: 18000, Loss: 0.0009285079431720078\n" 2798 | ] 2799 | }, 2800 | { 2801 | "name": "stderr", 2802 | "output_type": "stream", 2803 | "text": [ 2804 | " 5%|███▎ | 18105/400000 [09:09<3:09:08, 33.65it/s]" 2805 | ] 2806 | }, 2807 | { 2808 | "name": "stdout", 2809 | "output_type": "stream", 2810 | "text": [ 2811 | "Epoch: 18100, Loss: 0.0009051068336702883\n" 2812 | ] 2813 | }, 2814 | { 2815 | "name": "stderr", 2816 | "output_type": "stream", 2817 | "text": [ 2818 | " 5%|███▎ | 18205/400000 [09:12<3:09:09, 33.64it/s]" 2819 | ] 2820 | }, 2821 | { 2822 | "name": "stdout", 2823 | "output_type": "stream", 2824 | "text": [ 2825 | "Epoch: 18200, Loss: 0.0008887582225725055\n" 2826 | ] 2827 | }, 2828 | { 2829 | "name": "stderr", 2830 | "output_type": "stream", 2831 | "text": [ 2832 | " 5%|███▎ | 18305/400000 [09:15<3:06:38, 34.09it/s]" 2833 | ] 2834 | }, 2835 | { 2836 | "name": "stdout", 2837 | "output_type": "stream", 2838 | "text": [ 2839 | "Epoch: 18300, Loss: 0.0008713402203284204\n" 2840 | ] 2841 | }, 2842 | { 2843 | "name": "stderr", 2844 | "output_type": "stream", 2845 | "text": [ 2846 | " 5%|███▎ | 18405/400000 [09:18<3:08:23, 33.76it/s]" 2847 | ] 2848 | }, 2849 | { 2850 | "name": "stdout", 2851 | "output_type": "stream", 2852 | "text": [ 2853 | "Epoch: 18400, Loss: 0.0008556987158954144\n" 2854 | ] 2855 | }, 2856 | { 2857 | "name": "stderr", 2858 | "output_type": "stream", 2859 | "text": [ 2860 | " 5%|███▍ | 18505/400000 [09:21<3:07:11, 33.97it/s]" 2861 | ] 2862 | }, 2863 | { 2864 | "name": "stdout", 2865 | "output_type": "stream", 2866 | "text": [ 2867 | "Epoch: 18500, Loss: 0.0008394994074478745\n" 2868 | ] 2869 | }, 2870 | { 2871 | "name": "stderr", 2872 | "output_type": "stream", 2873 | "text": [ 2874 | " 5%|███▍ | 18605/400000 [09:24<3:07:29, 33.90it/s]" 2875 | ] 2876 | }, 2877 | { 2878 | "name": "stdout", 2879 | "output_type": "stream", 2880 | "text": [ 2881 | "Epoch: 18600, Loss: 0.00082241534255445\n" 2882 | ] 2883 | }, 2884 | { 2885 | "name": "stderr", 2886 | "output_type": "stream", 2887 | "text": [ 2888 | " 5%|███▍ | 18705/400000 [09:27<3:06:23, 34.10it/s]" 2889 | ] 2890 | }, 2891 | { 2892 | "name": "stdout", 2893 | "output_type": "stream", 2894 | "text": [ 2895 | "Epoch: 18700, Loss: 0.0008073170902207494\n" 2896 | ] 2897 | }, 2898 | { 2899 | "name": "stderr", 2900 | "output_type": "stream", 2901 | "text": [ 2902 | " 5%|███▍ | 18805/400000 [09:30<3:13:52, 32.77it/s]" 2903 | ] 2904 | }, 2905 | { 2906 | "name": "stdout", 2907 | "output_type": "stream", 2908 | "text": [ 2909 | "Epoch: 18800, Loss: 0.0007932971348054707\n" 2910 | ] 2911 | }, 2912 | { 2913 | "name": "stderr", 2914 | "output_type": "stream", 2915 | "text": [ 2916 | " 5%|███▍ | 18905/400000 [09:33<3:13:33, 32.81it/s]" 2917 | ] 2918 | }, 2919 | { 2920 | "name": "stdout", 2921 | "output_type": "stream", 2922 | "text": [ 2923 | "Epoch: 18900, Loss: 0.0007795089622959495\n" 2924 | ] 2925 | }, 2926 | { 2927 | "name": "stderr", 2928 | "output_type": "stream", 2929 | "text": [ 2930 | " 5%|███▍ | 19005/400000 [09:36<3:07:47, 33.81it/s]" 2931 | ] 2932 | }, 2933 | { 2934 | "name": "stdout", 2935 | "output_type": "stream", 2936 | "text": [ 2937 | "Epoch: 19000, Loss: 0.0007663825526833534\n" 2938 | ] 2939 | }, 2940 | { 2941 | "name": "stderr", 2942 | "output_type": "stream", 2943 | "text": [ 2944 | " 5%|███▍ | 19105/400000 [09:39<3:08:34, 33.67it/s]" 2945 | ] 2946 | }, 2947 | { 2948 | "name": "stdout", 2949 | "output_type": "stream", 2950 | "text": [ 2951 | "Epoch: 19100, Loss: 0.0007547418354079127\n" 2952 | ] 2953 | }, 2954 | { 2955 | "name": "stderr", 2956 | "output_type": "stream", 2957 | "text": [ 2958 | " 5%|███▌ | 19205/400000 [09:42<3:07:15, 33.89it/s]" 2959 | ] 2960 | }, 2961 | { 2962 | "name": "stdout", 2963 | "output_type": "stream", 2964 | "text": [ 2965 | "Epoch: 19200, Loss: 0.0007444659131579101\n" 2966 | ] 2967 | }, 2968 | { 2969 | "name": "stderr", 2970 | "output_type": "stream", 2971 | "text": [ 2972 | " 5%|███▌ | 19305/400000 [09:45<3:05:58, 34.12it/s]" 2973 | ] 2974 | }, 2975 | { 2976 | "name": "stdout", 2977 | "output_type": "stream", 2978 | "text": [ 2979 | "Epoch: 19300, Loss: 0.0007333174580708146\n" 2980 | ] 2981 | }, 2982 | { 2983 | "name": "stderr", 2984 | "output_type": "stream", 2985 | "text": [ 2986 | " 5%|███▌ | 19405/400000 [09:48<3:06:45, 33.96it/s]" 2987 | ] 2988 | }, 2989 | { 2990 | "name": "stdout", 2991 | "output_type": "stream", 2992 | "text": [ 2993 | "Epoch: 19400, Loss: 0.0007215025834739208\n" 2994 | ] 2995 | }, 2996 | { 2997 | "name": "stderr", 2998 | "output_type": "stream", 2999 | "text": [ 3000 | " 5%|███▌ | 19505/400000 [09:51<3:16:22, 32.29it/s]" 3001 | ] 3002 | }, 3003 | { 3004 | "name": "stdout", 3005 | "output_type": "stream", 3006 | "text": [ 3007 | "Epoch: 19500, Loss: 0.0007089879363775253\n" 3008 | ] 3009 | }, 3010 | { 3011 | "name": "stderr", 3012 | "output_type": "stream", 3013 | "text": [ 3014 | " 5%|███▌ | 19605/400000 [09:54<3:08:28, 33.64it/s]" 3015 | ] 3016 | }, 3017 | { 3018 | "name": "stdout", 3019 | "output_type": "stream", 3020 | "text": [ 3021 | "Epoch: 19600, Loss: 0.0006959100137464702\n" 3022 | ] 3023 | }, 3024 | { 3025 | "name": "stderr", 3026 | "output_type": "stream", 3027 | "text": [ 3028 | " 5%|███▌ | 19705/400000 [09:57<3:11:01, 33.18it/s]" 3029 | ] 3030 | }, 3031 | { 3032 | "name": "stdout", 3033 | "output_type": "stream", 3034 | "text": [ 3035 | "Epoch: 19700, Loss: 0.0006822186405770481\n" 3036 | ] 3037 | }, 3038 | { 3039 | "name": "stderr", 3040 | "output_type": "stream", 3041 | "text": [ 3042 | " 5%|███▌ | 19805/400000 [10:00<3:05:39, 34.13it/s]" 3043 | ] 3044 | }, 3045 | { 3046 | "name": "stdout", 3047 | "output_type": "stream", 3048 | "text": [ 3049 | "Epoch: 19800, Loss: 0.0006720974342897534\n" 3050 | ] 3051 | }, 3052 | { 3053 | "name": "stderr", 3054 | "output_type": "stream", 3055 | "text": [ 3056 | " 5%|███▋ | 19905/400000 [10:03<3:06:14, 34.01it/s]" 3057 | ] 3058 | }, 3059 | { 3060 | "name": "stdout", 3061 | "output_type": "stream", 3062 | "text": [ 3063 | "Epoch: 19900, Loss: 0.0006612513097934425\n" 3064 | ] 3065 | }, 3066 | { 3067 | "name": "stderr", 3068 | "output_type": "stream", 3069 | "text": [ 3070 | " 5%|███▋ | 20005/400000 [10:06<3:06:15, 34.00it/s]" 3071 | ] 3072 | }, 3073 | { 3074 | "name": "stdout", 3075 | "output_type": "stream", 3076 | "text": [ 3077 | "Epoch: 20000, Loss: 0.0006513803964480758\n" 3078 | ] 3079 | }, 3080 | { 3081 | "name": "stderr", 3082 | "output_type": "stream", 3083 | "text": [ 3084 | " 5%|███▋ | 20105/400000 [10:08<3:05:45, 34.09it/s]" 3085 | ] 3086 | }, 3087 | { 3088 | "name": "stdout", 3089 | "output_type": "stream", 3090 | "text": [ 3091 | "Epoch: 20100, Loss: 0.0006397423567250371\n" 3092 | ] 3093 | }, 3094 | { 3095 | "name": "stderr", 3096 | "output_type": "stream", 3097 | "text": [ 3098 | " 5%|███▋ | 20205/400000 [10:11<3:10:04, 33.30it/s]" 3099 | ] 3100 | }, 3101 | { 3102 | "name": "stdout", 3103 | "output_type": "stream", 3104 | "text": [ 3105 | "Epoch: 20200, Loss: 0.0006303120171651244\n" 3106 | ] 3107 | }, 3108 | { 3109 | "name": "stderr", 3110 | "output_type": "stream", 3111 | "text": [ 3112 | " 5%|███▋ | 20305/400000 [10:14<3:08:36, 33.55it/s]" 3113 | ] 3114 | }, 3115 | { 3116 | "name": "stdout", 3117 | "output_type": "stream", 3118 | "text": [ 3119 | "Epoch: 20300, Loss: 0.0006184402154758573\n" 3120 | ] 3121 | }, 3122 | { 3123 | "name": "stderr", 3124 | "output_type": "stream", 3125 | "text": [ 3126 | " 5%|███▋ | 20405/400000 [10:17<3:08:41, 33.53it/s]" 3127 | ] 3128 | }, 3129 | { 3130 | "name": "stdout", 3131 | "output_type": "stream", 3132 | "text": [ 3133 | "Epoch: 20400, Loss: 0.0006079827435314655\n" 3134 | ] 3135 | }, 3136 | { 3137 | "name": "stderr", 3138 | "output_type": "stream", 3139 | "text": [ 3140 | " 5%|███▋ | 20505/400000 [10:20<3:04:48, 34.22it/s]" 3141 | ] 3142 | }, 3143 | { 3144 | "name": "stdout", 3145 | "output_type": "stream", 3146 | "text": [ 3147 | "Epoch: 20500, Loss: 0.0005976406391710043\n" 3148 | ] 3149 | }, 3150 | { 3151 | "name": "stderr", 3152 | "output_type": "stream", 3153 | "text": [ 3154 | " 5%|███▊ | 20605/400000 [10:23<3:05:39, 34.06it/s]" 3155 | ] 3156 | }, 3157 | { 3158 | "name": "stdout", 3159 | "output_type": "stream", 3160 | "text": [ 3161 | "Epoch: 20600, Loss: 0.0005869594169780612\n" 3162 | ] 3163 | }, 3164 | { 3165 | "name": "stderr", 3166 | "output_type": "stream", 3167 | "text": [ 3168 | " 5%|███▊ | 20705/400000 [10:26<3:04:20, 34.29it/s]" 3169 | ] 3170 | }, 3171 | { 3172 | "name": "stdout", 3173 | "output_type": "stream", 3174 | "text": [ 3175 | "Epoch: 20700, Loss: 0.0005781382787972689\n" 3176 | ] 3177 | }, 3178 | { 3179 | "name": "stderr", 3180 | "output_type": "stream", 3181 | "text": [ 3182 | " 5%|███▊ | 20805/400000 [10:29<3:07:25, 33.72it/s]" 3183 | ] 3184 | }, 3185 | { 3186 | "name": "stdout", 3187 | "output_type": "stream", 3188 | "text": [ 3189 | "Epoch: 20800, Loss: 0.0005711280973628163\n" 3190 | ] 3191 | }, 3192 | { 3193 | "name": "stderr", 3194 | "output_type": "stream", 3195 | "text": [ 3196 | " 5%|███▊ | 20905/400000 [10:32<3:05:42, 34.02it/s]" 3197 | ] 3198 | }, 3199 | { 3200 | "name": "stdout", 3201 | "output_type": "stream", 3202 | "text": [ 3203 | "Epoch: 20900, Loss: 0.0005635357811115682\n" 3204 | ] 3205 | }, 3206 | { 3207 | "name": "stderr", 3208 | "output_type": "stream", 3209 | "text": [ 3210 | " 5%|███▊ | 21005/400000 [10:35<3:07:03, 33.77it/s]" 3211 | ] 3212 | }, 3213 | { 3214 | "name": "stdout", 3215 | "output_type": "stream", 3216 | "text": [ 3217 | "Epoch: 21000, Loss: 0.0005549752386286855\n" 3218 | ] 3219 | }, 3220 | { 3221 | "name": "stderr", 3222 | "output_type": "stream", 3223 | "text": [ 3224 | " 5%|███▊ | 21105/400000 [10:38<3:05:56, 33.96it/s]" 3225 | ] 3226 | }, 3227 | { 3228 | "name": "stdout", 3229 | "output_type": "stream", 3230 | "text": [ 3231 | "Epoch: 21100, Loss: 0.0005457149818539619\n" 3232 | ] 3233 | }, 3234 | { 3235 | "name": "stderr", 3236 | "output_type": "stream", 3237 | "text": [ 3238 | " 5%|███▊ | 21205/400000 [10:41<3:06:19, 33.88it/s]" 3239 | ] 3240 | }, 3241 | { 3242 | "name": "stdout", 3243 | "output_type": "stream", 3244 | "text": [ 3245 | "Epoch: 21200, Loss: 0.0005368505953811109\n" 3246 | ] 3247 | }, 3248 | { 3249 | "name": "stderr", 3250 | "output_type": "stream", 3251 | "text": [ 3252 | " 5%|███▉ | 21305/400000 [10:44<3:06:58, 33.76it/s]" 3253 | ] 3254 | }, 3255 | { 3256 | "name": "stdout", 3257 | "output_type": "stream", 3258 | "text": [ 3259 | "Epoch: 21300, Loss: 0.0005281572230160236\n" 3260 | ] 3261 | }, 3262 | { 3263 | "name": "stderr", 3264 | "output_type": "stream", 3265 | "text": [ 3266 | " 5%|███▉ | 21405/400000 [10:47<3:04:09, 34.26it/s]" 3267 | ] 3268 | }, 3269 | { 3270 | "name": "stdout", 3271 | "output_type": "stream", 3272 | "text": [ 3273 | "Epoch: 21400, Loss: 0.0005198598955757916\n" 3274 | ] 3275 | }, 3276 | { 3277 | "name": "stderr", 3278 | "output_type": "stream", 3279 | "text": [ 3280 | " 5%|███▉ | 21505/400000 [10:50<3:07:16, 33.68it/s]" 3281 | ] 3282 | }, 3283 | { 3284 | "name": "stdout", 3285 | "output_type": "stream", 3286 | "text": [ 3287 | "Epoch: 21500, Loss: 0.0005117605323903263\n" 3288 | ] 3289 | }, 3290 | { 3291 | "name": "stderr", 3292 | "output_type": "stream", 3293 | "text": [ 3294 | " 5%|███▉ | 21605/400000 [10:53<3:06:17, 33.85it/s]" 3295 | ] 3296 | }, 3297 | { 3298 | "name": "stdout", 3299 | "output_type": "stream", 3300 | "text": [ 3301 | "Epoch: 21600, Loss: 0.0005040232208557427\n" 3302 | ] 3303 | }, 3304 | { 3305 | "name": "stderr", 3306 | "output_type": "stream", 3307 | "text": [ 3308 | " 5%|███▉ | 21705/400000 [10:56<3:11:27, 32.93it/s]" 3309 | ] 3310 | }, 3311 | { 3312 | "name": "stdout", 3313 | "output_type": "stream", 3314 | "text": [ 3315 | "Epoch: 21700, Loss: 0.0004960864898748696\n" 3316 | ] 3317 | }, 3318 | { 3319 | "name": "stderr", 3320 | "output_type": "stream", 3321 | "text": [ 3322 | " 5%|███▉ | 21805/400000 [10:59<3:08:36, 33.42it/s]" 3323 | ] 3324 | }, 3325 | { 3326 | "name": "stdout", 3327 | "output_type": "stream", 3328 | "text": [ 3329 | "Epoch: 21800, Loss: 0.0004890303825959563\n" 3330 | ] 3331 | }, 3332 | { 3333 | "name": "stderr", 3334 | "output_type": "stream", 3335 | "text": [ 3336 | " 5%|███▉ | 21905/400000 [11:02<3:04:35, 34.14it/s]" 3337 | ] 3338 | }, 3339 | { 3340 | "name": "stdout", 3341 | "output_type": "stream", 3342 | "text": [ 3343 | "Epoch: 21900, Loss: 0.0004816695291083306\n" 3344 | ] 3345 | }, 3346 | { 3347 | "name": "stderr", 3348 | "output_type": "stream", 3349 | "text": [ 3350 | " 6%|████ | 22005/400000 [11:05<3:04:42, 34.11it/s]" 3351 | ] 3352 | }, 3353 | { 3354 | "name": "stdout", 3355 | "output_type": "stream", 3356 | "text": [ 3357 | "Epoch: 22000, Loss: 0.000474887783639133\n" 3358 | ] 3359 | }, 3360 | { 3361 | "name": "stderr", 3362 | "output_type": "stream", 3363 | "text": [ 3364 | " 6%|████ | 22105/400000 [11:08<3:09:01, 33.32it/s]" 3365 | ] 3366 | }, 3367 | { 3368 | "name": "stdout", 3369 | "output_type": "stream", 3370 | "text": [ 3371 | "Epoch: 22100, Loss: 0.0004686623578891158\n" 3372 | ] 3373 | }, 3374 | { 3375 | "name": "stderr", 3376 | "output_type": "stream", 3377 | "text": [ 3378 | " 6%|████ | 22205/400000 [11:11<3:05:25, 33.96it/s]" 3379 | ] 3380 | }, 3381 | { 3382 | "name": "stdout", 3383 | "output_type": "stream", 3384 | "text": [ 3385 | "Epoch: 22200, Loss: 0.00046337780077010393\n" 3386 | ] 3387 | }, 3388 | { 3389 | "name": "stderr", 3390 | "output_type": "stream", 3391 | "text": [ 3392 | " 6%|████ | 22305/400000 [11:14<3:05:35, 33.92it/s]" 3393 | ] 3394 | }, 3395 | { 3396 | "name": "stdout", 3397 | "output_type": "stream", 3398 | "text": [ 3399 | "Epoch: 22300, Loss: 0.00045733278966508806\n" 3400 | ] 3401 | }, 3402 | { 3403 | "name": "stderr", 3404 | "output_type": "stream", 3405 | "text": [ 3406 | " 6%|████ | 22405/400000 [11:17<3:06:05, 33.82it/s]" 3407 | ] 3408 | }, 3409 | { 3410 | "name": "stdout", 3411 | "output_type": "stream", 3412 | "text": [ 3413 | "Epoch: 22400, Loss: 0.0004512631567195058\n" 3414 | ] 3415 | }, 3416 | { 3417 | "name": "stderr", 3418 | "output_type": "stream", 3419 | "text": [ 3420 | " 6%|████ | 22505/400000 [11:20<3:08:05, 33.45it/s]" 3421 | ] 3422 | }, 3423 | { 3424 | "name": "stdout", 3425 | "output_type": "stream", 3426 | "text": [ 3427 | "Epoch: 22500, Loss: 0.00044526736019179225\n" 3428 | ] 3429 | }, 3430 | { 3431 | "name": "stderr", 3432 | "output_type": "stream", 3433 | "text": [ 3434 | " 6%|████▏ | 22605/400000 [11:23<3:06:09, 33.79it/s]" 3435 | ] 3436 | }, 3437 | { 3438 | "name": "stdout", 3439 | "output_type": "stream", 3440 | "text": [ 3441 | "Epoch: 22600, Loss: 0.00043937869486398995\n" 3442 | ] 3443 | }, 3444 | { 3445 | "name": "stderr", 3446 | "output_type": "stream", 3447 | "text": [ 3448 | " 6%|████▏ | 22705/400000 [11:26<3:07:14, 33.58it/s]" 3449 | ] 3450 | }, 3451 | { 3452 | "name": "stdout", 3453 | "output_type": "stream", 3454 | "text": [ 3455 | "Epoch: 22700, Loss: 0.0004346022615209222\n" 3456 | ] 3457 | }, 3458 | { 3459 | "name": "stderr", 3460 | "output_type": "stream", 3461 | "text": [ 3462 | " 6%|████▏ | 22805/400000 [11:29<3:05:28, 33.89it/s]" 3463 | ] 3464 | }, 3465 | { 3466 | "name": "stdout", 3467 | "output_type": "stream", 3468 | "text": [ 3469 | "Epoch: 22800, Loss: 0.000429803563747555\n" 3470 | ] 3471 | }, 3472 | { 3473 | "name": "stderr", 3474 | "output_type": "stream", 3475 | "text": [ 3476 | " 6%|████▏ | 22905/400000 [11:32<3:04:26, 34.07it/s]" 3477 | ] 3478 | }, 3479 | { 3480 | "name": "stdout", 3481 | "output_type": "stream", 3482 | "text": [ 3483 | "Epoch: 22900, Loss: 0.00042511223000474274\n" 3484 | ] 3485 | }, 3486 | { 3487 | "name": "stderr", 3488 | "output_type": "stream", 3489 | "text": [ 3490 | " 6%|████▏ | 23005/400000 [11:34<3:03:10, 34.30it/s]" 3491 | ] 3492 | }, 3493 | { 3494 | "name": "stdout", 3495 | "output_type": "stream", 3496 | "text": [ 3497 | "Epoch: 23000, Loss: 0.00042033076169900596\n" 3498 | ] 3499 | }, 3500 | { 3501 | "name": "stderr", 3502 | "output_type": "stream", 3503 | "text": [ 3504 | " 6%|████▏ | 23105/400000 [11:37<3:04:57, 33.96it/s]" 3505 | ] 3506 | }, 3507 | { 3508 | "name": "stdout", 3509 | "output_type": "stream", 3510 | "text": [ 3511 | "Epoch: 23100, Loss: 0.0004152730107307434\n" 3512 | ] 3513 | }, 3514 | { 3515 | "name": "stderr", 3516 | "output_type": "stream", 3517 | "text": [ 3518 | " 6%|████▏ | 23205/400000 [11:40<3:05:54, 33.78it/s]" 3519 | ] 3520 | }, 3521 | { 3522 | "name": "stdout", 3523 | "output_type": "stream", 3524 | "text": [ 3525 | "Epoch: 23200, Loss: 0.0004105412808712572\n" 3526 | ] 3527 | }, 3528 | { 3529 | "name": "stderr", 3530 | "output_type": "stream", 3531 | "text": [ 3532 | " 6%|████▎ | 23305/400000 [11:43<3:04:24, 34.05it/s]" 3533 | ] 3534 | }, 3535 | { 3536 | "name": "stdout", 3537 | "output_type": "stream", 3538 | "text": [ 3539 | "Epoch: 23300, Loss: 0.0004055238969158381\n" 3540 | ] 3541 | }, 3542 | { 3543 | "name": "stderr", 3544 | "output_type": "stream", 3545 | "text": [ 3546 | " 6%|████▎ | 23405/400000 [11:46<3:08:52, 33.23it/s]" 3547 | ] 3548 | }, 3549 | { 3550 | "name": "stdout", 3551 | "output_type": "stream", 3552 | "text": [ 3553 | "Epoch: 23400, Loss: 0.0004009026743005961\n" 3554 | ] 3555 | }, 3556 | { 3557 | "name": "stderr", 3558 | "output_type": "stream", 3559 | "text": [ 3560 | " 6%|████▎ | 23505/400000 [11:49<3:06:27, 33.65it/s]" 3561 | ] 3562 | }, 3563 | { 3564 | "name": "stdout", 3565 | "output_type": "stream", 3566 | "text": [ 3567 | "Epoch: 23500, Loss: 0.00039579090662300587\n" 3568 | ] 3569 | }, 3570 | { 3571 | "name": "stderr", 3572 | "output_type": "stream", 3573 | "text": [ 3574 | " 6%|████▎ | 23605/400000 [11:52<3:06:00, 33.73it/s]" 3575 | ] 3576 | }, 3577 | { 3578 | "name": "stdout", 3579 | "output_type": "stream", 3580 | "text": [ 3581 | "Epoch: 23600, Loss: 0.0003912524553015828\n" 3582 | ] 3583 | }, 3584 | { 3585 | "name": "stderr", 3586 | "output_type": "stream", 3587 | "text": [ 3588 | " 6%|████▎ | 23705/400000 [11:55<3:03:43, 34.14it/s]" 3589 | ] 3590 | }, 3591 | { 3592 | "name": "stdout", 3593 | "output_type": "stream", 3594 | "text": [ 3595 | "Epoch: 23700, Loss: 0.00038634351221844554\n" 3596 | ] 3597 | }, 3598 | { 3599 | "name": "stderr", 3600 | "output_type": "stream", 3601 | "text": [ 3602 | " 6%|████▎ | 23805/400000 [11:58<3:11:59, 32.66it/s]" 3603 | ] 3604 | }, 3605 | { 3606 | "name": "stdout", 3607 | "output_type": "stream", 3608 | "text": [ 3609 | "Epoch: 23800, Loss: 0.0003806964959949255\n" 3610 | ] 3611 | }, 3612 | { 3613 | "name": "stderr", 3614 | "output_type": "stream", 3615 | "text": [ 3616 | " 6%|████▎ | 23905/400000 [12:01<3:08:36, 33.23it/s]" 3617 | ] 3618 | }, 3619 | { 3620 | "name": "stdout", 3621 | "output_type": "stream", 3622 | "text": [ 3623 | "Epoch: 23900, Loss: 0.0003757687227334827\n" 3624 | ] 3625 | }, 3626 | { 3627 | "name": "stderr", 3628 | "output_type": "stream", 3629 | "text": [ 3630 | " 6%|████▍ | 24005/400000 [12:04<3:05:53, 33.71it/s]" 3631 | ] 3632 | }, 3633 | { 3634 | "name": "stdout", 3635 | "output_type": "stream", 3636 | "text": [ 3637 | "Epoch: 24000, Loss: 0.00037168533890508115\n" 3638 | ] 3639 | }, 3640 | { 3641 | "name": "stderr", 3642 | "output_type": "stream", 3643 | "text": [ 3644 | " 6%|████▍ | 24105/400000 [12:07<3:03:57, 34.05it/s]" 3645 | ] 3646 | }, 3647 | { 3648 | "name": "stdout", 3649 | "output_type": "stream", 3650 | "text": [ 3651 | "Epoch: 24100, Loss: 0.00036779872607439756\n" 3652 | ] 3653 | }, 3654 | { 3655 | "name": "stderr", 3656 | "output_type": "stream", 3657 | "text": [ 3658 | " 6%|████▍ | 24205/400000 [12:10<3:03:38, 34.11it/s]" 3659 | ] 3660 | }, 3661 | { 3662 | "name": "stdout", 3663 | "output_type": "stream", 3664 | "text": [ 3665 | "Epoch: 24200, Loss: 0.00036384633858688176\n" 3666 | ] 3667 | }, 3668 | { 3669 | "name": "stderr", 3670 | "output_type": "stream", 3671 | "text": [ 3672 | " 6%|████▍ | 24305/400000 [12:13<3:06:40, 33.54it/s]" 3673 | ] 3674 | }, 3675 | { 3676 | "name": "stdout", 3677 | "output_type": "stream", 3678 | "text": [ 3679 | "Epoch: 24300, Loss: 0.000359501747880131\n" 3680 | ] 3681 | }, 3682 | { 3683 | "name": "stderr", 3684 | "output_type": "stream", 3685 | "text": [ 3686 | " 6%|████▍ | 24405/400000 [12:16<3:02:35, 34.28it/s]" 3687 | ] 3688 | }, 3689 | { 3690 | "name": "stdout", 3691 | "output_type": "stream", 3692 | "text": [ 3693 | "Epoch: 24400, Loss: 0.0003551846311893314\n" 3694 | ] 3695 | }, 3696 | { 3697 | "name": "stderr", 3698 | "output_type": "stream", 3699 | "text": [ 3700 | " 6%|████▍ | 24505/400000 [12:19<3:06:55, 33.48it/s]" 3701 | ] 3702 | }, 3703 | { 3704 | "name": "stdout", 3705 | "output_type": "stream", 3706 | "text": [ 3707 | "Epoch: 24500, Loss: 0.0003513636183924973\n" 3708 | ] 3709 | }, 3710 | { 3711 | "name": "stderr", 3712 | "output_type": "stream", 3713 | "text": [ 3714 | " 6%|████▍ | 24605/400000 [12:22<3:02:09, 34.35it/s]" 3715 | ] 3716 | }, 3717 | { 3718 | "name": "stdout", 3719 | "output_type": "stream", 3720 | "text": [ 3721 | "Epoch: 24600, Loss: 0.0003469741204753518\n" 3722 | ] 3723 | }, 3724 | { 3725 | "name": "stderr", 3726 | "output_type": "stream", 3727 | "text": [ 3728 | " 6%|████▌ | 24705/400000 [12:25<3:02:51, 34.21it/s]" 3729 | ] 3730 | }, 3731 | { 3732 | "name": "stdout", 3733 | "output_type": "stream", 3734 | "text": [ 3735 | "Epoch: 24700, Loss: 0.0003426425391808152\n" 3736 | ] 3737 | }, 3738 | { 3739 | "name": "stderr", 3740 | "output_type": "stream", 3741 | "text": [ 3742 | " 6%|████▌ | 24806/400000 [12:28<3:06:28, 33.53it/s]" 3743 | ] 3744 | }, 3745 | { 3746 | "name": "stdout", 3747 | "output_type": "stream", 3748 | "text": [ 3749 | "Epoch: 24800, Loss: 0.00033797218929976225\n" 3750 | ] 3751 | }, 3752 | { 3753 | "name": "stderr", 3754 | "output_type": "stream", 3755 | "text": [ 3756 | " 6%|████▌ | 24906/400000 [12:31<3:01:40, 34.41it/s]" 3757 | ] 3758 | }, 3759 | { 3760 | "name": "stdout", 3761 | "output_type": "stream", 3762 | "text": [ 3763 | "Epoch: 24900, Loss: 0.00033373921178281307\n" 3764 | ] 3765 | }, 3766 | { 3767 | "name": "stderr", 3768 | "output_type": "stream", 3769 | "text": [ 3770 | " 6%|████▌ | 25006/400000 [12:34<3:02:09, 34.31it/s]" 3771 | ] 3772 | }, 3773 | { 3774 | "name": "stdout", 3775 | "output_type": "stream", 3776 | "text": [ 3777 | "Epoch: 25000, Loss: 0.0003301659889984876\n" 3778 | ] 3779 | }, 3780 | { 3781 | "name": "stderr", 3782 | "output_type": "stream", 3783 | "text": [ 3784 | " 6%|████▌ | 25107/400000 [12:37<3:05:34, 33.67it/s]" 3785 | ] 3786 | }, 3787 | { 3788 | "name": "stdout", 3789 | "output_type": "stream", 3790 | "text": [ 3791 | "Epoch: 25100, Loss: 0.0003266967833042145\n" 3792 | ] 3793 | }, 3794 | { 3795 | "name": "stderr", 3796 | "output_type": "stream", 3797 | "text": [ 3798 | " 6%|████▌ | 25204/400000 [12:39<3:09:00, 33.05it/s]" 3799 | ] 3800 | }, 3801 | { 3802 | "name": "stdout", 3803 | "output_type": "stream", 3804 | "text": [ 3805 | "Epoch: 25200, Loss: 0.0003233618044760078\n" 3806 | ] 3807 | }, 3808 | { 3809 | "name": "stderr", 3810 | "output_type": "stream", 3811 | "text": [ 3812 | " 6%|████▌ | 25305/400000 [12:43<3:57:55, 26.25it/s]" 3813 | ] 3814 | }, 3815 | { 3816 | "name": "stdout", 3817 | "output_type": "stream", 3818 | "text": [ 3819 | "Epoch: 25300, Loss: 0.0003193989978171885\n" 3820 | ] 3821 | }, 3822 | { 3823 | "name": "stderr", 3824 | "output_type": "stream", 3825 | "text": [ 3826 | " 6%|████▋ | 25405/400000 [12:47<3:53:51, 26.70it/s]" 3827 | ] 3828 | }, 3829 | { 3830 | "name": "stdout", 3831 | "output_type": "stream", 3832 | "text": [ 3833 | "Epoch: 25400, Loss: 0.00031529695843346417\n" 3834 | ] 3835 | }, 3836 | { 3837 | "name": "stderr", 3838 | "output_type": "stream", 3839 | "text": [ 3840 | " 6%|████▋ | 25504/400000 [12:50<3:49:43, 27.17it/s]" 3841 | ] 3842 | }, 3843 | { 3844 | "name": "stdout", 3845 | "output_type": "stream", 3846 | "text": [ 3847 | "Epoch: 25500, Loss: 0.0003118431195616722\n" 3848 | ] 3849 | }, 3850 | { 3851 | "name": "stderr", 3852 | "output_type": "stream", 3853 | "text": [ 3854 | " 6%|████▋ | 25607/400000 [12:54<3:16:17, 31.79it/s]" 3855 | ] 3856 | }, 3857 | { 3858 | "name": "stdout", 3859 | "output_type": "stream", 3860 | "text": [ 3861 | "Epoch: 25600, Loss: 0.0003082235052715987\n" 3862 | ] 3863 | }, 3864 | { 3865 | "name": "stderr", 3866 | "output_type": "stream", 3867 | "text": [ 3868 | " 6%|████▋ | 25707/400000 [12:57<3:02:29, 34.18it/s]" 3869 | ] 3870 | }, 3871 | { 3872 | "name": "stdout", 3873 | "output_type": "stream", 3874 | "text": [ 3875 | "Epoch: 25700, Loss: 0.00030471227364614606\n" 3876 | ] 3877 | }, 3878 | { 3879 | "name": "stderr", 3880 | "output_type": "stream", 3881 | "text": [ 3882 | " 6%|████▋ | 25807/400000 [13:00<2:58:14, 34.99it/s]" 3883 | ] 3884 | }, 3885 | { 3886 | "name": "stdout", 3887 | "output_type": "stream", 3888 | "text": [ 3889 | "Epoch: 25800, Loss: 0.0003009273495990783\n" 3890 | ] 3891 | }, 3892 | { 3893 | "name": "stderr", 3894 | "output_type": "stream", 3895 | "text": [ 3896 | " 6%|████▋ | 25907/400000 [13:03<3:02:31, 34.16it/s]" 3897 | ] 3898 | }, 3899 | { 3900 | "name": "stdout", 3901 | "output_type": "stream", 3902 | "text": [ 3903 | "Epoch: 25900, Loss: 0.00029735040152445436\n" 3904 | ] 3905 | }, 3906 | { 3907 | "name": "stderr", 3908 | "output_type": "stream", 3909 | "text": [ 3910 | " 7%|████▋ | 26007/400000 [13:06<3:07:01, 33.33it/s]" 3911 | ] 3912 | }, 3913 | { 3914 | "name": "stdout", 3915 | "output_type": "stream", 3916 | "text": [ 3917 | "Epoch: 26000, Loss: 0.00029319716850295663\n" 3918 | ] 3919 | }, 3920 | { 3921 | "name": "stderr", 3922 | "output_type": "stream", 3923 | "text": [ 3924 | " 7%|████▊ | 26107/400000 [13:09<3:10:42, 32.68it/s]" 3925 | ] 3926 | }, 3927 | { 3928 | "name": "stdout", 3929 | "output_type": "stream", 3930 | "text": [ 3931 | "Epoch: 26100, Loss: 0.0002895935031119734\n" 3932 | ] 3933 | }, 3934 | { 3935 | "name": "stderr", 3936 | "output_type": "stream", 3937 | "text": [ 3938 | " 7%|████▊ | 26207/400000 [13:12<3:08:58, 32.97it/s]" 3939 | ] 3940 | }, 3941 | { 3942 | "name": "stdout", 3943 | "output_type": "stream", 3944 | "text": [ 3945 | "Epoch: 26200, Loss: 0.00028659310191869736\n" 3946 | ] 3947 | }, 3948 | { 3949 | "name": "stderr", 3950 | "output_type": "stream", 3951 | "text": [ 3952 | " 7%|████▊ | 26307/400000 [13:15<3:04:18, 33.79it/s]" 3953 | ] 3954 | }, 3955 | { 3956 | "name": "stdout", 3957 | "output_type": "stream", 3958 | "text": [ 3959 | "Epoch: 26300, Loss: 0.0002832940372172743\n" 3960 | ] 3961 | }, 3962 | { 3963 | "name": "stderr", 3964 | "output_type": "stream", 3965 | "text": [ 3966 | " 7%|████▊ | 26407/400000 [13:18<3:04:52, 33.68it/s]" 3967 | ] 3968 | }, 3969 | { 3970 | "name": "stdout", 3971 | "output_type": "stream", 3972 | "text": [ 3973 | "Epoch: 26400, Loss: 0.000279638305073604\n" 3974 | ] 3975 | }, 3976 | { 3977 | "name": "stderr", 3978 | "output_type": "stream", 3979 | "text": [ 3980 | " 7%|████▊ | 26507/400000 [13:21<3:10:51, 32.62it/s]" 3981 | ] 3982 | }, 3983 | { 3984 | "name": "stdout", 3985 | "output_type": "stream", 3986 | "text": [ 3987 | "Epoch: 26500, Loss: 0.0002765277458820492\n" 3988 | ] 3989 | }, 3990 | { 3991 | "name": "stderr", 3992 | "output_type": "stream", 3993 | "text": [ 3994 | " 7%|████▊ | 26607/400000 [13:24<3:12:24, 32.34it/s]" 3995 | ] 3996 | }, 3997 | { 3998 | "name": "stdout", 3999 | "output_type": "stream", 4000 | "text": [ 4001 | "Epoch: 26600, Loss: 0.00027344291447661817\n" 4002 | ] 4003 | }, 4004 | { 4005 | "name": "stderr", 4006 | "output_type": "stream", 4007 | "text": [ 4008 | " 7%|████▊ | 26707/400000 [13:27<3:07:07, 33.25it/s]" 4009 | ] 4010 | }, 4011 | { 4012 | "name": "stdout", 4013 | "output_type": "stream", 4014 | "text": [ 4015 | "Epoch: 26700, Loss: 0.0002701134653761983\n" 4016 | ] 4017 | }, 4018 | { 4019 | "name": "stderr", 4020 | "output_type": "stream", 4021 | "text": [ 4022 | " 7%|████▉ | 26807/400000 [13:30<3:12:48, 32.26it/s]" 4023 | ] 4024 | }, 4025 | { 4026 | "name": "stdout", 4027 | "output_type": "stream", 4028 | "text": [ 4029 | "Epoch: 26800, Loss: 0.0002670060202945024\n" 4030 | ] 4031 | }, 4032 | { 4033 | "name": "stderr", 4034 | "output_type": "stream", 4035 | "text": [ 4036 | " 7%|████▉ | 26907/400000 [13:33<3:03:26, 33.90it/s]" 4037 | ] 4038 | }, 4039 | { 4040 | "name": "stdout", 4041 | "output_type": "stream", 4042 | "text": [ 4043 | "Epoch: 26900, Loss: 0.0002644372289068997\n" 4044 | ] 4045 | }, 4046 | { 4047 | "name": "stderr", 4048 | "output_type": "stream", 4049 | "text": [ 4050 | " 7%|████▉ | 27007/400000 [13:36<3:05:44, 33.47it/s]" 4051 | ] 4052 | }, 4053 | { 4054 | "name": "stdout", 4055 | "output_type": "stream", 4056 | "text": [ 4057 | "Epoch: 27000, Loss: 0.00026146130403503776\n" 4058 | ] 4059 | }, 4060 | { 4061 | "name": "stderr", 4062 | "output_type": "stream", 4063 | "text": [ 4064 | " 7%|████▉ | 27107/400000 [13:39<3:05:10, 33.56it/s]" 4065 | ] 4066 | }, 4067 | { 4068 | "name": "stdout", 4069 | "output_type": "stream", 4070 | "text": [ 4071 | "Epoch: 27100, Loss: 0.00025845831260085106\n" 4072 | ] 4073 | }, 4074 | { 4075 | "name": "stderr", 4076 | "output_type": "stream", 4077 | "text": [ 4078 | " 7%|████▉ | 27207/400000 [13:42<3:08:00, 33.05it/s]" 4079 | ] 4080 | }, 4081 | { 4082 | "name": "stdout", 4083 | "output_type": "stream", 4084 | "text": [ 4085 | "Epoch: 27200, Loss: 0.00025546911638230085\n" 4086 | ] 4087 | }, 4088 | { 4089 | "name": "stderr", 4090 | "output_type": "stream", 4091 | "text": [ 4092 | " 7%|████▉ | 27307/400000 [13:45<3:07:25, 33.14it/s]" 4093 | ] 4094 | }, 4095 | { 4096 | "name": "stdout", 4097 | "output_type": "stream", 4098 | "text": [ 4099 | "Epoch: 27300, Loss: 0.0002523384755477309\n" 4100 | ] 4101 | }, 4102 | { 4103 | "name": "stderr", 4104 | "output_type": "stream", 4105 | "text": [ 4106 | " 7%|█████ | 27407/400000 [13:48<3:09:03, 32.85it/s]" 4107 | ] 4108 | }, 4109 | { 4110 | "name": "stdout", 4111 | "output_type": "stream", 4112 | "text": [ 4113 | "Epoch: 27400, Loss: 0.00024967093486338854\n" 4114 | ] 4115 | }, 4116 | { 4117 | "name": "stderr", 4118 | "output_type": "stream", 4119 | "text": [ 4120 | " 7%|█████ | 27507/400000 [13:51<3:06:09, 33.35it/s]" 4121 | ] 4122 | }, 4123 | { 4124 | "name": "stdout", 4125 | "output_type": "stream", 4126 | "text": [ 4127 | "Epoch: 27500, Loss: 0.0002472566266078502\n" 4128 | ] 4129 | }, 4130 | { 4131 | "name": "stderr", 4132 | "output_type": "stream", 4133 | "text": [ 4134 | " 7%|█████ | 27607/400000 [13:54<3:11:22, 32.43it/s]" 4135 | ] 4136 | }, 4137 | { 4138 | "name": "stdout", 4139 | "output_type": "stream", 4140 | "text": [ 4141 | "Epoch: 27600, Loss: 0.00024457360268570483\n" 4142 | ] 4143 | }, 4144 | { 4145 | "name": "stderr", 4146 | "output_type": "stream", 4147 | "text": [ 4148 | " 7%|█████ | 27707/400000 [13:57<3:08:56, 32.84it/s]" 4149 | ] 4150 | }, 4151 | { 4152 | "name": "stdout", 4153 | "output_type": "stream", 4154 | "text": [ 4155 | "Epoch: 27700, Loss: 0.00024241743085440248\n" 4156 | ] 4157 | }, 4158 | { 4159 | "name": "stderr", 4160 | "output_type": "stream", 4161 | "text": [ 4162 | " 7%|█████ | 27807/400000 [14:00<3:01:38, 34.15it/s]" 4163 | ] 4164 | }, 4165 | { 4166 | "name": "stdout", 4167 | "output_type": "stream", 4168 | "text": [ 4169 | "Epoch: 27800, Loss: 0.0002402394311502576\n" 4170 | ] 4171 | }, 4172 | { 4173 | "name": "stderr", 4174 | "output_type": "stream", 4175 | "text": [ 4176 | " 7%|█████ | 27907/400000 [14:03<3:03:22, 33.82it/s]" 4177 | ] 4178 | }, 4179 | { 4180 | "name": "stdout", 4181 | "output_type": "stream", 4182 | "text": [ 4183 | "Epoch: 27900, Loss: 0.00023749018146190792\n" 4184 | ] 4185 | }, 4186 | { 4187 | "name": "stderr", 4188 | "output_type": "stream", 4189 | "text": [ 4190 | " 7%|█████ | 28007/400000 [14:06<3:02:29, 33.97it/s]" 4191 | ] 4192 | }, 4193 | { 4194 | "name": "stdout", 4195 | "output_type": "stream", 4196 | "text": [ 4197 | "Epoch: 28000, Loss: 0.0002355445030843839\n" 4198 | ] 4199 | }, 4200 | { 4201 | "name": "stderr", 4202 | "output_type": "stream", 4203 | "text": [ 4204 | " 7%|█████▏ | 28107/400000 [14:09<3:07:42, 33.02it/s]" 4205 | ] 4206 | }, 4207 | { 4208 | "name": "stdout", 4209 | "output_type": "stream", 4210 | "text": [ 4211 | "Epoch: 28100, Loss: 0.0002333974844077602\n" 4212 | ] 4213 | }, 4214 | { 4215 | "name": "stderr", 4216 | "output_type": "stream", 4217 | "text": [ 4218 | " 7%|█████▏ | 28207/400000 [14:12<3:07:40, 33.02it/s]" 4219 | ] 4220 | }, 4221 | { 4222 | "name": "stdout", 4223 | "output_type": "stream", 4224 | "text": [ 4225 | "Epoch: 28200, Loss: 0.00023147545289248228\n" 4226 | ] 4227 | }, 4228 | { 4229 | "name": "stderr", 4230 | "output_type": "stream", 4231 | "text": [ 4232 | " 7%|█████▏ | 28307/400000 [14:15<3:06:11, 33.27it/s]" 4233 | ] 4234 | }, 4235 | { 4236 | "name": "stdout", 4237 | "output_type": "stream", 4238 | "text": [ 4239 | "Epoch: 28300, Loss: 0.00022948614787310362\n" 4240 | ] 4241 | }, 4242 | { 4243 | "name": "stderr", 4244 | "output_type": "stream", 4245 | "text": [ 4246 | " 7%|█████▏ | 28407/400000 [14:18<3:04:00, 33.66it/s]" 4247 | ] 4248 | }, 4249 | { 4250 | "name": "stdout", 4251 | "output_type": "stream", 4252 | "text": [ 4253 | "Epoch: 28400, Loss: 0.00022737924882676452\n" 4254 | ] 4255 | }, 4256 | { 4257 | "name": "stderr", 4258 | "output_type": "stream", 4259 | "text": [ 4260 | " 7%|█████▏ | 28507/400000 [14:21<3:08:04, 32.92it/s]" 4261 | ] 4262 | }, 4263 | { 4264 | "name": "stdout", 4265 | "output_type": "stream", 4266 | "text": [ 4267 | "Epoch: 28500, Loss: 0.00022542172519024462\n" 4268 | ] 4269 | }, 4270 | { 4271 | "name": "stderr", 4272 | "output_type": "stream", 4273 | "text": [ 4274 | " 7%|█████▏ | 28607/400000 [14:24<3:10:46, 32.45it/s]" 4275 | ] 4276 | }, 4277 | { 4278 | "name": "stdout", 4279 | "output_type": "stream", 4280 | "text": [ 4281 | "Epoch: 28600, Loss: 0.00022357411216944456\n" 4282 | ] 4283 | }, 4284 | { 4285 | "name": "stderr", 4286 | "output_type": "stream", 4287 | "text": [ 4288 | " 7%|█████▏ | 28707/400000 [14:27<3:12:19, 32.18it/s]" 4289 | ] 4290 | }, 4291 | { 4292 | "name": "stdout", 4293 | "output_type": "stream", 4294 | "text": [ 4295 | "Epoch: 28700, Loss: 0.0002213203115388751\n" 4296 | ] 4297 | }, 4298 | { 4299 | "name": "stderr", 4300 | "output_type": "stream", 4301 | "text": [ 4302 | " 7%|█████▎ | 28807/400000 [14:30<3:03:14, 33.76it/s]" 4303 | ] 4304 | }, 4305 | { 4306 | "name": "stdout", 4307 | "output_type": "stream", 4308 | "text": [ 4309 | "Epoch: 28800, Loss: 0.000219358887989074\n" 4310 | ] 4311 | }, 4312 | { 4313 | "name": "stderr", 4314 | "output_type": "stream", 4315 | "text": [ 4316 | " 7%|█████▎ | 28907/400000 [14:33<3:03:07, 33.78it/s]" 4317 | ] 4318 | }, 4319 | { 4320 | "name": "stdout", 4321 | "output_type": "stream", 4322 | "text": [ 4323 | "Epoch: 28900, Loss: 0.00021758840011898428\n" 4324 | ] 4325 | }, 4326 | { 4327 | "name": "stderr", 4328 | "output_type": "stream", 4329 | "text": [ 4330 | " 7%|█████▎ | 29007/400000 [14:36<3:06:00, 33.24it/s]" 4331 | ] 4332 | }, 4333 | { 4334 | "name": "stdout", 4335 | "output_type": "stream", 4336 | "text": [ 4337 | "Epoch: 29000, Loss: 0.00021555593411903828\n" 4338 | ] 4339 | }, 4340 | { 4341 | "name": "stderr", 4342 | "output_type": "stream", 4343 | "text": [ 4344 | " 7%|█████▎ | 29107/400000 [14:39<3:07:24, 32.99it/s]" 4345 | ] 4346 | }, 4347 | { 4348 | "name": "stdout", 4349 | "output_type": "stream", 4350 | "text": [ 4351 | "Epoch: 29100, Loss: 0.00021375558571889997\n" 4352 | ] 4353 | }, 4354 | { 4355 | "name": "stderr", 4356 | "output_type": "stream", 4357 | "text": [ 4358 | " 7%|█████▎ | 29207/400000 [14:42<3:10:04, 32.51it/s]" 4359 | ] 4360 | }, 4361 | { 4362 | "name": "stdout", 4363 | "output_type": "stream", 4364 | "text": [ 4365 | "Epoch: 29200, Loss: 0.00021195950102992356\n" 4366 | ] 4367 | }, 4368 | { 4369 | "name": "stderr", 4370 | "output_type": "stream", 4371 | "text": [ 4372 | " 7%|█████▎ | 29305/400000 [14:45<3:20:15, 30.85it/s]" 4373 | ] 4374 | }, 4375 | { 4376 | "name": "stdout", 4377 | "output_type": "stream", 4378 | "text": [ 4379 | "Epoch: 29300, Loss: 0.00021044259483460337\n" 4380 | ] 4381 | }, 4382 | { 4383 | "name": "stderr", 4384 | "output_type": "stream", 4385 | "text": [ 4386 | " 7%|█████▎ | 29405/400000 [14:48<3:12:19, 32.12it/s]" 4387 | ] 4388 | }, 4389 | { 4390 | "name": "stdout", 4391 | "output_type": "stream", 4392 | "text": [ 4393 | "Epoch: 29400, Loss: 0.00020882011449430138\n" 4394 | ] 4395 | }, 4396 | { 4397 | "name": "stderr", 4398 | "output_type": "stream", 4399 | "text": [ 4400 | " 7%|█████▍ | 29505/400000 [14:52<3:08:37, 32.74it/s]" 4401 | ] 4402 | }, 4403 | { 4404 | "name": "stdout", 4405 | "output_type": "stream", 4406 | "text": [ 4407 | "Epoch: 29500, Loss: 0.00020713337289635092\n" 4408 | ] 4409 | }, 4410 | { 4411 | "name": "stderr", 4412 | "output_type": "stream", 4413 | "text": [ 4414 | " 7%|█████▍ | 29605/400000 [14:55<3:03:43, 33.60it/s]" 4415 | ] 4416 | }, 4417 | { 4418 | "name": "stdout", 4419 | "output_type": "stream", 4420 | "text": [ 4421 | "Epoch: 29600, Loss: 0.00020554526417981833\n" 4422 | ] 4423 | }, 4424 | { 4425 | "name": "stderr", 4426 | "output_type": "stream", 4427 | "text": [ 4428 | " 7%|█████▍ | 29705/400000 [14:58<3:09:55, 32.50it/s]" 4429 | ] 4430 | }, 4431 | { 4432 | "name": "stdout", 4433 | "output_type": "stream", 4434 | "text": [ 4435 | "Epoch: 29700, Loss: 0.00020415615290403366\n" 4436 | ] 4437 | }, 4438 | { 4439 | "name": "stderr", 4440 | "output_type": "stream", 4441 | "text": [ 4442 | " 7%|█████▍ | 29805/400000 [15:01<3:05:26, 33.27it/s]" 4443 | ] 4444 | }, 4445 | { 4446 | "name": "stdout", 4447 | "output_type": "stream", 4448 | "text": [ 4449 | "Epoch: 29800, Loss: 0.00020253889670129865\n" 4450 | ] 4451 | }, 4452 | { 4453 | "name": "stderr", 4454 | "output_type": "stream", 4455 | "text": [ 4456 | " 7%|█████▍ | 29905/400000 [15:04<3:01:36, 33.96it/s]" 4457 | ] 4458 | }, 4459 | { 4460 | "name": "stdout", 4461 | "output_type": "stream", 4462 | "text": [ 4463 | "Epoch: 29900, Loss: 0.00020104310533497483\n" 4464 | ] 4465 | }, 4466 | { 4467 | "name": "stderr", 4468 | "output_type": "stream", 4469 | "text": [ 4470 | " 8%|█████▍ | 30005/400000 [15:07<3:08:36, 32.70it/s]" 4471 | ] 4472 | }, 4473 | { 4474 | "name": "stdout", 4475 | "output_type": "stream", 4476 | "text": [ 4477 | "Epoch: 30000, Loss: 0.00019965277169831097\n" 4478 | ] 4479 | }, 4480 | { 4481 | "name": "stderr", 4482 | "output_type": "stream", 4483 | "text": [ 4484 | " 8%|█████▍ | 30105/400000 [15:10<3:02:58, 33.69it/s]" 4485 | ] 4486 | }, 4487 | { 4488 | "name": "stdout", 4489 | "output_type": "stream", 4490 | "text": [ 4491 | "Epoch: 30100, Loss: 0.00019823206821456552\n" 4492 | ] 4493 | }, 4494 | { 4495 | "name": "stderr", 4496 | "output_type": "stream", 4497 | "text": [ 4498 | " 8%|█████▌ | 30205/400000 [15:13<3:06:12, 33.10it/s]" 4499 | ] 4500 | }, 4501 | { 4502 | "name": "stdout", 4503 | "output_type": "stream", 4504 | "text": [ 4505 | "Epoch: 30200, Loss: 0.00019687775056809187\n" 4506 | ] 4507 | }, 4508 | { 4509 | "name": "stderr", 4510 | "output_type": "stream", 4511 | "text": [ 4512 | " 8%|█████▌ | 30305/400000 [15:16<3:01:35, 33.93it/s]" 4513 | ] 4514 | }, 4515 | { 4516 | "name": "stdout", 4517 | "output_type": "stream", 4518 | "text": [ 4519 | "Epoch: 30300, Loss: 0.00019549208809621632\n" 4520 | ] 4521 | }, 4522 | { 4523 | "name": "stderr", 4524 | "output_type": "stream", 4525 | "text": [ 4526 | " 8%|█████▌ | 30405/400000 [15:19<3:02:59, 33.66it/s]" 4527 | ] 4528 | }, 4529 | { 4530 | "name": "stdout", 4531 | "output_type": "stream", 4532 | "text": [ 4533 | "Epoch: 30400, Loss: 0.0001942270464496687\n" 4534 | ] 4535 | }, 4536 | { 4537 | "name": "stderr", 4538 | "output_type": "stream", 4539 | "text": [ 4540 | " 8%|█████▌ | 30505/400000 [15:22<3:13:56, 31.75it/s]" 4541 | ] 4542 | }, 4543 | { 4544 | "name": "stdout", 4545 | "output_type": "stream", 4546 | "text": [ 4547 | "Epoch: 30500, Loss: 0.00019277987303212285\n" 4548 | ] 4549 | }, 4550 | { 4551 | "name": "stderr", 4552 | "output_type": "stream", 4553 | "text": [ 4554 | " 8%|█████▌ | 30605/400000 [15:25<3:08:04, 32.73it/s]" 4555 | ] 4556 | }, 4557 | { 4558 | "name": "stdout", 4559 | "output_type": "stream", 4560 | "text": [ 4561 | "Epoch: 30600, Loss: 0.00019139629148412496\n" 4562 | ] 4563 | }, 4564 | { 4565 | "name": "stderr", 4566 | "output_type": "stream", 4567 | "text": [ 4568 | " 8%|█████▌ | 30705/400000 [15:28<3:08:09, 32.71it/s]" 4569 | ] 4570 | }, 4571 | { 4572 | "name": "stdout", 4573 | "output_type": "stream", 4574 | "text": [ 4575 | "Epoch: 30700, Loss: 0.0001900623319670558\n" 4576 | ] 4577 | }, 4578 | { 4579 | "name": "stderr", 4580 | "output_type": "stream", 4581 | "text": [ 4582 | " 8%|█████▌ | 30805/400000 [15:31<3:02:43, 33.67it/s]" 4583 | ] 4584 | }, 4585 | { 4586 | "name": "stdout", 4587 | "output_type": "stream", 4588 | "text": [ 4589 | "Epoch: 30800, Loss: 0.00018865044694393873\n" 4590 | ] 4591 | }, 4592 | { 4593 | "name": "stderr", 4594 | "output_type": "stream", 4595 | "text": [ 4596 | " 8%|█████▋ | 30905/400000 [15:34<3:03:58, 33.44it/s]" 4597 | ] 4598 | }, 4599 | { 4600 | "name": "stdout", 4601 | "output_type": "stream", 4602 | "text": [ 4603 | "Epoch: 30900, Loss: 0.0001872217981144786\n" 4604 | ] 4605 | }, 4606 | { 4607 | "name": "stderr", 4608 | "output_type": "stream", 4609 | "text": [ 4610 | " 8%|█████▋ | 31005/400000 [15:37<3:07:14, 32.84it/s]" 4611 | ] 4612 | }, 4613 | { 4614 | "name": "stdout", 4615 | "output_type": "stream", 4616 | "text": [ 4617 | "Epoch: 31000, Loss: 0.00018577113223727793\n" 4618 | ] 4619 | }, 4620 | { 4621 | "name": "stderr", 4622 | "output_type": "stream", 4623 | "text": [ 4624 | " 8%|█████▋ | 31105/400000 [15:40<3:07:47, 32.74it/s]" 4625 | ] 4626 | }, 4627 | { 4628 | "name": "stdout", 4629 | "output_type": "stream", 4630 | "text": [ 4631 | "Epoch: 31100, Loss: 0.00018439634004607797\n" 4632 | ] 4633 | }, 4634 | { 4635 | "name": "stderr", 4636 | "output_type": "stream", 4637 | "text": [ 4638 | " 8%|█████▋ | 31205/400000 [15:43<3:04:55, 33.24it/s]" 4639 | ] 4640 | }, 4641 | { 4642 | "name": "stdout", 4643 | "output_type": "stream", 4644 | "text": [ 4645 | "Epoch: 31200, Loss: 0.00018314026237931103\n" 4646 | ] 4647 | }, 4648 | { 4649 | "name": "stderr", 4650 | "output_type": "stream", 4651 | "text": [ 4652 | " 8%|█████▋ | 31305/400000 [15:46<3:06:54, 32.88it/s]" 4653 | ] 4654 | }, 4655 | { 4656 | "name": "stdout", 4657 | "output_type": "stream", 4658 | "text": [ 4659 | "Epoch: 31300, Loss: 0.0001817329612094909\n" 4660 | ] 4661 | }, 4662 | { 4663 | "name": "stderr", 4664 | "output_type": "stream", 4665 | "text": [ 4666 | " 8%|█████▋ | 31405/400000 [15:49<3:02:53, 33.59it/s]" 4667 | ] 4668 | }, 4669 | { 4670 | "name": "stdout", 4671 | "output_type": "stream", 4672 | "text": [ 4673 | "Epoch: 31400, Loss: 0.00018042423471342772\n" 4674 | ] 4675 | }, 4676 | { 4677 | "name": "stderr", 4678 | "output_type": "stream", 4679 | "text": [ 4680 | " 8%|█████▋ | 31505/400000 [15:52<3:05:55, 33.03it/s]" 4681 | ] 4682 | }, 4683 | { 4684 | "name": "stdout", 4685 | "output_type": "stream", 4686 | "text": [ 4687 | "Epoch: 31500, Loss: 0.00017902151739690453\n" 4688 | ] 4689 | }, 4690 | { 4691 | "name": "stderr", 4692 | "output_type": "stream", 4693 | "text": [ 4694 | " 8%|█████▊ | 31605/400000 [15:55<3:03:50, 33.40it/s]" 4695 | ] 4696 | }, 4697 | { 4698 | "name": "stdout", 4699 | "output_type": "stream", 4700 | "text": [ 4701 | "Epoch: 31600, Loss: 0.00017762188508640975\n" 4702 | ] 4703 | }, 4704 | { 4705 | "name": "stderr", 4706 | "output_type": "stream", 4707 | "text": [ 4708 | " 8%|█████▊ | 31705/400000 [15:58<3:01:37, 33.80it/s]" 4709 | ] 4710 | }, 4711 | { 4712 | "name": "stdout", 4713 | "output_type": "stream", 4714 | "text": [ 4715 | "Epoch: 31700, Loss: 0.00017619124264456332\n" 4716 | ] 4717 | }, 4718 | { 4719 | "name": "stderr", 4720 | "output_type": "stream", 4721 | "text": [ 4722 | " 8%|█████▊ | 31805/400000 [16:01<3:04:27, 33.27it/s]" 4723 | ] 4724 | }, 4725 | { 4726 | "name": "stdout", 4727 | "output_type": "stream", 4728 | "text": [ 4729 | "Epoch: 31800, Loss: 0.00017459104128647596\n" 4730 | ] 4731 | }, 4732 | { 4733 | "name": "stderr", 4734 | "output_type": "stream", 4735 | "text": [ 4736 | " 8%|█████▊ | 31905/400000 [16:04<3:01:35, 33.79it/s]" 4737 | ] 4738 | }, 4739 | { 4740 | "name": "stdout", 4741 | "output_type": "stream", 4742 | "text": [ 4743 | "Epoch: 31900, Loss: 0.00017302730702795088\n" 4744 | ] 4745 | }, 4746 | { 4747 | "name": "stderr", 4748 | "output_type": "stream", 4749 | "text": [ 4750 | " 8%|█████▊ | 32005/400000 [16:07<3:06:03, 32.96it/s]" 4751 | ] 4752 | }, 4753 | { 4754 | "name": "stdout", 4755 | "output_type": "stream", 4756 | "text": [ 4757 | "Epoch: 32000, Loss: 0.00017169228522107005\n" 4758 | ] 4759 | }, 4760 | { 4761 | "name": "stderr", 4762 | "output_type": "stream", 4763 | "text": [ 4764 | " 8%|█████▊ | 32105/400000 [16:10<3:07:04, 32.78it/s]" 4765 | ] 4766 | }, 4767 | { 4768 | "name": "stdout", 4769 | "output_type": "stream", 4770 | "text": [ 4771 | "Epoch: 32100, Loss: 0.0001706474577076733\n" 4772 | ] 4773 | }, 4774 | { 4775 | "name": "stderr", 4776 | "output_type": "stream", 4777 | "text": [ 4778 | " 8%|█████▉ | 32205/400000 [16:13<3:04:07, 33.29it/s]" 4779 | ] 4780 | }, 4781 | { 4782 | "name": "stdout", 4783 | "output_type": "stream", 4784 | "text": [ 4785 | "Epoch: 32200, Loss: 0.00016937952022999525\n" 4786 | ] 4787 | }, 4788 | { 4789 | "name": "stderr", 4790 | "output_type": "stream", 4791 | "text": [ 4792 | " 8%|█████▉ | 32305/400000 [16:16<3:02:13, 33.63it/s]" 4793 | ] 4794 | }, 4795 | { 4796 | "name": "stdout", 4797 | "output_type": "stream", 4798 | "text": [ 4799 | "Epoch: 32300, Loss: 0.0001682553847786039\n" 4800 | ] 4801 | }, 4802 | { 4803 | "name": "stderr", 4804 | "output_type": "stream", 4805 | "text": [ 4806 | " 8%|█████▉ | 32405/400000 [16:19<3:07:21, 32.70it/s]" 4807 | ] 4808 | }, 4809 | { 4810 | "name": "stdout", 4811 | "output_type": "stream", 4812 | "text": [ 4813 | "Epoch: 32400, Loss: 0.00016698215040378273\n" 4814 | ] 4815 | }, 4816 | { 4817 | "name": "stderr", 4818 | "output_type": "stream", 4819 | "text": [ 4820 | " 8%|█████▉ | 32505/400000 [16:22<3:09:16, 32.36it/s]" 4821 | ] 4822 | }, 4823 | { 4824 | "name": "stdout", 4825 | "output_type": "stream", 4826 | "text": [ 4827 | "Epoch: 32500, Loss: 0.0001658303663134575\n" 4828 | ] 4829 | }, 4830 | { 4831 | "name": "stderr", 4832 | "output_type": "stream", 4833 | "text": [ 4834 | " 8%|█████▉ | 32605/400000 [16:25<3:04:24, 33.20it/s]" 4835 | ] 4836 | }, 4837 | { 4838 | "name": "stdout", 4839 | "output_type": "stream", 4840 | "text": [ 4841 | "Epoch: 32600, Loss: 0.0001645278389332816\n" 4842 | ] 4843 | }, 4844 | { 4845 | "name": "stderr", 4846 | "output_type": "stream", 4847 | "text": [ 4848 | " 8%|█████▉ | 32705/400000 [16:29<3:06:10, 32.88it/s]" 4849 | ] 4850 | }, 4851 | { 4852 | "name": "stdout", 4853 | "output_type": "stream", 4854 | "text": [ 4855 | "Epoch: 32700, Loss: 0.00016336525732185692\n" 4856 | ] 4857 | }, 4858 | { 4859 | "name": "stderr", 4860 | "output_type": "stream", 4861 | "text": [ 4862 | " 8%|█████▉ | 32805/400000 [16:32<3:04:57, 33.09it/s]" 4863 | ] 4864 | }, 4865 | { 4866 | "name": "stdout", 4867 | "output_type": "stream", 4868 | "text": [ 4869 | "Epoch: 32800, Loss: 0.00016219225653912872\n" 4870 | ] 4871 | }, 4872 | { 4873 | "name": "stderr", 4874 | "output_type": "stream", 4875 | "text": [ 4876 | " 8%|██████ | 32905/400000 [16:35<3:09:17, 32.32it/s]" 4877 | ] 4878 | }, 4879 | { 4880 | "name": "stdout", 4881 | "output_type": "stream", 4882 | "text": [ 4883 | "Epoch: 32900, Loss: 0.00016072948346845806\n" 4884 | ] 4885 | }, 4886 | { 4887 | "name": "stderr", 4888 | "output_type": "stream", 4889 | "text": [ 4890 | " 8%|██████ | 33005/400000 [16:38<3:04:00, 33.24it/s]" 4891 | ] 4892 | }, 4893 | { 4894 | "name": "stdout", 4895 | "output_type": "stream", 4896 | "text": [ 4897 | "Epoch: 33000, Loss: 0.00015960408200044185\n" 4898 | ] 4899 | }, 4900 | { 4901 | "name": "stderr", 4902 | "output_type": "stream", 4903 | "text": [ 4904 | " 8%|██████ | 33105/400000 [16:41<3:06:35, 32.77it/s]" 4905 | ] 4906 | }, 4907 | { 4908 | "name": "stdout", 4909 | "output_type": "stream", 4910 | "text": [ 4911 | "Epoch: 33100, Loss: 0.00015854106459300965\n" 4912 | ] 4913 | }, 4914 | { 4915 | "name": "stderr", 4916 | "output_type": "stream", 4917 | "text": [ 4918 | " 8%|██████ | 33205/400000 [16:44<3:02:53, 33.43it/s]" 4919 | ] 4920 | }, 4921 | { 4922 | "name": "stdout", 4923 | "output_type": "stream", 4924 | "text": [ 4925 | "Epoch: 33200, Loss: 0.00015752256149426103\n" 4926 | ] 4927 | }, 4928 | { 4929 | "name": "stderr", 4930 | "output_type": "stream", 4931 | "text": [ 4932 | " 8%|██████ | 33305/400000 [16:47<2:59:28, 34.05it/s]" 4933 | ] 4934 | }, 4935 | { 4936 | "name": "stdout", 4937 | "output_type": "stream", 4938 | "text": [ 4939 | "Epoch: 33300, Loss: 0.00015632528811693192\n" 4940 | ] 4941 | }, 4942 | { 4943 | "name": "stderr", 4944 | "output_type": "stream", 4945 | "text": [ 4946 | " 8%|██████ | 33405/400000 [16:50<3:05:22, 32.96it/s]" 4947 | ] 4948 | }, 4949 | { 4950 | "name": "stdout", 4951 | "output_type": "stream", 4952 | "text": [ 4953 | "Epoch: 33400, Loss: 0.00015545070345979184\n" 4954 | ] 4955 | }, 4956 | { 4957 | "name": "stderr", 4958 | "output_type": "stream", 4959 | "text": [ 4960 | " 8%|██████ | 33505/400000 [16:53<3:01:32, 33.65it/s]" 4961 | ] 4962 | }, 4963 | { 4964 | "name": "stdout", 4965 | "output_type": "stream", 4966 | "text": [ 4967 | "Epoch: 33500, Loss: 0.0001545538689242676\n" 4968 | ] 4969 | }, 4970 | { 4971 | "name": "stderr", 4972 | "output_type": "stream", 4973 | "text": [ 4974 | " 8%|██████▏ | 33605/400000 [16:56<3:07:37, 32.55it/s]" 4975 | ] 4976 | }, 4977 | { 4978 | "name": "stdout", 4979 | "output_type": "stream", 4980 | "text": [ 4981 | "Epoch: 33600, Loss: 0.00015361205441877246\n" 4982 | ] 4983 | }, 4984 | { 4985 | "name": "stderr", 4986 | "output_type": "stream", 4987 | "text": [ 4988 | " 8%|██████▏ | 33705/400000 [16:59<3:08:03, 32.46it/s]" 4989 | ] 4990 | }, 4991 | { 4992 | "name": "stdout", 4993 | "output_type": "stream", 4994 | "text": [ 4995 | "Epoch: 33700, Loss: 0.0001525732659501955\n" 4996 | ] 4997 | }, 4998 | { 4999 | "name": "stderr", 5000 | "output_type": "stream", 5001 | "text": [ 5002 | " 8%|██████▏ | 33805/400000 [17:02<3:17:27, 30.91it/s]" 5003 | ] 5004 | }, 5005 | { 5006 | "name": "stdout", 5007 | "output_type": "stream", 5008 | "text": [ 5009 | "Epoch: 33800, Loss: 0.0001515761687187478\n" 5010 | ] 5011 | }, 5012 | { 5013 | "name": "stderr", 5014 | "output_type": "stream", 5015 | "text": [ 5016 | " 8%|██████▏ | 33905/400000 [17:05<3:03:49, 33.19it/s]" 5017 | ] 5018 | }, 5019 | { 5020 | "name": "stdout", 5021 | "output_type": "stream", 5022 | "text": [ 5023 | "Epoch: 33900, Loss: 0.0001506053376942873\n" 5024 | ] 5025 | }, 5026 | { 5027 | "name": "stderr", 5028 | "output_type": "stream", 5029 | "text": [ 5030 | " 9%|██████▏ | 34005/400000 [17:08<3:03:56, 33.16it/s]" 5031 | ] 5032 | }, 5033 | { 5034 | "name": "stdout", 5035 | "output_type": "stream", 5036 | "text": [ 5037 | "Epoch: 34000, Loss: 0.00014945986913517118\n" 5038 | ] 5039 | }, 5040 | { 5041 | "name": "stderr", 5042 | "output_type": "stream", 5043 | "text": [ 5044 | " 9%|██████▏ | 34105/400000 [17:11<3:01:28, 33.60it/s]" 5045 | ] 5046 | }, 5047 | { 5048 | "name": "stdout", 5049 | "output_type": "stream", 5050 | "text": [ 5051 | "Epoch: 34100, Loss: 0.00014843250392004848\n" 5052 | ] 5053 | }, 5054 | { 5055 | "name": "stderr", 5056 | "output_type": "stream", 5057 | "text": [ 5058 | " 9%|██████▏ | 34205/400000 [17:14<2:57:04, 34.43it/s]" 5059 | ] 5060 | }, 5061 | { 5062 | "name": "stdout", 5063 | "output_type": "stream", 5064 | "text": [ 5065 | "Epoch: 34200, Loss: 0.00014748048852197826\n" 5066 | ] 5067 | }, 5068 | { 5069 | "name": "stderr", 5070 | "output_type": "stream", 5071 | "text": [ 5072 | " 9%|██████▎ | 34305/400000 [17:17<2:57:35, 34.32it/s]" 5073 | ] 5074 | }, 5075 | { 5076 | "name": "stdout", 5077 | "output_type": "stream", 5078 | "text": [ 5079 | "Epoch: 34300, Loss: 0.0001466141256969422\n" 5080 | ] 5081 | }, 5082 | { 5083 | "name": "stderr", 5084 | "output_type": "stream", 5085 | "text": [ 5086 | " 9%|██████▎ | 34405/400000 [17:20<2:58:53, 34.06it/s]" 5087 | ] 5088 | }, 5089 | { 5090 | "name": "stdout", 5091 | "output_type": "stream", 5092 | "text": [ 5093 | "Epoch: 34400, Loss: 0.0001457605540053919\n" 5094 | ] 5095 | }, 5096 | { 5097 | "name": "stderr", 5098 | "output_type": "stream", 5099 | "text": [ 5100 | " 9%|██████▎ | 34505/400000 [17:23<2:56:41, 34.48it/s]" 5101 | ] 5102 | }, 5103 | { 5104 | "name": "stdout", 5105 | "output_type": "stream", 5106 | "text": [ 5107 | "Epoch: 34500, Loss: 0.00014484694111160934\n" 5108 | ] 5109 | }, 5110 | { 5111 | "name": "stderr", 5112 | "output_type": "stream", 5113 | "text": [ 5114 | " 9%|██████▎ | 34605/400000 [17:26<2:58:31, 34.11it/s]" 5115 | ] 5116 | }, 5117 | { 5118 | "name": "stdout", 5119 | "output_type": "stream", 5120 | "text": [ 5121 | "Epoch: 34600, Loss: 0.0001437831815565005\n" 5122 | ] 5123 | }, 5124 | { 5125 | "name": "stderr", 5126 | "output_type": "stream", 5127 | "text": [ 5128 | " 9%|██████▎ | 34705/400000 [17:28<3:03:22, 33.20it/s]" 5129 | ] 5130 | }, 5131 | { 5132 | "name": "stdout", 5133 | "output_type": "stream", 5134 | "text": [ 5135 | "Epoch: 34700, Loss: 0.00014277265290729702\n" 5136 | ] 5137 | }, 5138 | { 5139 | "name": "stderr", 5140 | "output_type": "stream", 5141 | "text": [ 5142 | " 9%|██████▎ | 34805/400000 [17:31<2:59:46, 33.86it/s]" 5143 | ] 5144 | }, 5145 | { 5146 | "name": "stdout", 5147 | "output_type": "stream", 5148 | "text": [ 5149 | "Epoch: 34800, Loss: 0.00014170668146107346\n" 5150 | ] 5151 | }, 5152 | { 5153 | "name": "stderr", 5154 | "output_type": "stream", 5155 | "text": [ 5156 | " 9%|██████▎ | 34905/400000 [17:34<3:05:36, 32.78it/s]" 5157 | ] 5158 | }, 5159 | { 5160 | "name": "stdout", 5161 | "output_type": "stream", 5162 | "text": [ 5163 | "Epoch: 34900, Loss: 0.0001406351657351479\n" 5164 | ] 5165 | }, 5166 | { 5167 | "name": "stderr", 5168 | "output_type": "stream", 5169 | "text": [ 5170 | " 9%|██████▍ | 35005/400000 [17:37<3:01:06, 33.59it/s]" 5171 | ] 5172 | }, 5173 | { 5174 | "name": "stdout", 5175 | "output_type": "stream", 5176 | "text": [ 5177 | "Epoch: 35000, Loss: 0.00013957229384686798\n" 5178 | ] 5179 | }, 5180 | { 5181 | "name": "stderr", 5182 | "output_type": "stream", 5183 | "text": [ 5184 | " 9%|██████▍ | 35105/400000 [17:40<2:59:20, 33.91it/s]" 5185 | ] 5186 | }, 5187 | { 5188 | "name": "stdout", 5189 | "output_type": "stream", 5190 | "text": [ 5191 | "Epoch: 35100, Loss: 0.00013857994053978473\n" 5192 | ] 5193 | }, 5194 | { 5195 | "name": "stderr", 5196 | "output_type": "stream", 5197 | "text": [ 5198 | " 9%|██████▍ | 35205/400000 [17:43<3:00:34, 33.67it/s]" 5199 | ] 5200 | }, 5201 | { 5202 | "name": "stdout", 5203 | "output_type": "stream", 5204 | "text": [ 5205 | "Epoch: 35200, Loss: 0.0001377523149130866\n" 5206 | ] 5207 | }, 5208 | { 5209 | "name": "stderr", 5210 | "output_type": "stream", 5211 | "text": [ 5212 | " 9%|██████▍ | 35305/400000 [17:46<2:58:59, 33.96it/s]" 5213 | ] 5214 | }, 5215 | { 5216 | "name": "stdout", 5217 | "output_type": "stream", 5218 | "text": [ 5219 | "Epoch: 35300, Loss: 0.00013677298557013273\n" 5220 | ] 5221 | }, 5222 | { 5223 | "name": "stderr", 5224 | "output_type": "stream", 5225 | "text": [ 5226 | " 9%|██████▍ | 35405/400000 [17:49<3:00:33, 33.65it/s]" 5227 | ] 5228 | }, 5229 | { 5230 | "name": "stdout", 5231 | "output_type": "stream", 5232 | "text": [ 5233 | "Epoch: 35400, Loss: 0.00013575910998042673\n" 5234 | ] 5235 | }, 5236 | { 5237 | "name": "stderr", 5238 | "output_type": "stream", 5239 | "text": [ 5240 | " 9%|██████▍ | 35505/400000 [17:52<2:57:55, 34.14it/s]" 5241 | ] 5242 | }, 5243 | { 5244 | "name": "stdout", 5245 | "output_type": "stream", 5246 | "text": [ 5247 | "Epoch: 35500, Loss: 0.00013476831372827291\n" 5248 | ] 5249 | }, 5250 | { 5251 | "name": "stderr", 5252 | "output_type": "stream", 5253 | "text": [ 5254 | " 9%|██████▍ | 35605/400000 [17:55<2:58:37, 34.00it/s]" 5255 | ] 5256 | }, 5257 | { 5258 | "name": "stdout", 5259 | "output_type": "stream", 5260 | "text": [ 5261 | "Epoch: 35600, Loss: 0.00013375146954786032\n" 5262 | ] 5263 | }, 5264 | { 5265 | "name": "stderr", 5266 | "output_type": "stream", 5267 | "text": [ 5268 | " 9%|██████▌ | 35705/400000 [17:58<2:58:42, 33.97it/s]" 5269 | ] 5270 | }, 5271 | { 5272 | "name": "stdout", 5273 | "output_type": "stream", 5274 | "text": [ 5275 | "Epoch: 35700, Loss: 0.0001328407961409539\n" 5276 | ] 5277 | }, 5278 | { 5279 | "name": "stderr", 5280 | "output_type": "stream", 5281 | "text": [ 5282 | " 9%|██████▌ | 35805/400000 [18:01<2:57:38, 34.17it/s]" 5283 | ] 5284 | }, 5285 | { 5286 | "name": "stdout", 5287 | "output_type": "stream", 5288 | "text": [ 5289 | "Epoch: 35800, Loss: 0.00013203393609728664\n" 5290 | ] 5291 | }, 5292 | { 5293 | "name": "stderr", 5294 | "output_type": "stream", 5295 | "text": [ 5296 | " 9%|██████▌ | 35905/400000 [18:04<2:59:31, 33.80it/s]" 5297 | ] 5298 | }, 5299 | { 5300 | "name": "stdout", 5301 | "output_type": "stream", 5302 | "text": [ 5303 | "Epoch: 35900, Loss: 0.00013105761900078505\n" 5304 | ] 5305 | }, 5306 | { 5307 | "name": "stderr", 5308 | "output_type": "stream", 5309 | "text": [ 5310 | " 9%|██████▌ | 36005/400000 [18:07<2:59:40, 33.76it/s]" 5311 | ] 5312 | }, 5313 | { 5314 | "name": "stdout", 5315 | "output_type": "stream", 5316 | "text": [ 5317 | "Epoch: 36000, Loss: 0.00013021714403294027\n" 5318 | ] 5319 | }, 5320 | { 5321 | "name": "stderr", 5322 | "output_type": "stream", 5323 | "text": [ 5324 | " 9%|██████▌ | 36105/400000 [18:10<2:58:30, 33.98it/s]" 5325 | ] 5326 | }, 5327 | { 5328 | "name": "stdout", 5329 | "output_type": "stream", 5330 | "text": [ 5331 | "Epoch: 36100, Loss: 0.00012939056614413857\n" 5332 | ] 5333 | }, 5334 | { 5335 | "name": "stderr", 5336 | "output_type": "stream", 5337 | "text": [ 5338 | " 9%|██████▌ | 36205/400000 [18:13<3:01:07, 33.48it/s]" 5339 | ] 5340 | }, 5341 | { 5342 | "name": "stdout", 5343 | "output_type": "stream", 5344 | "text": [ 5345 | "Epoch: 36200, Loss: 0.0001283903984585777\n" 5346 | ] 5347 | }, 5348 | { 5349 | "name": "stderr", 5350 | "output_type": "stream", 5351 | "text": [ 5352 | " 9%|██████▋ | 36305/400000 [18:16<2:57:47, 34.10it/s]" 5353 | ] 5354 | }, 5355 | { 5356 | "name": "stdout", 5357 | "output_type": "stream", 5358 | "text": [ 5359 | "Epoch: 36300, Loss: 0.00012743138358928263\n" 5360 | ] 5361 | }, 5362 | { 5363 | "name": "stderr", 5364 | "output_type": "stream", 5365 | "text": [ 5366 | " 9%|██████▋ | 36405/400000 [18:19<3:00:19, 33.61it/s]" 5367 | ] 5368 | }, 5369 | { 5370 | "name": "stdout", 5371 | "output_type": "stream", 5372 | "text": [ 5373 | "Epoch: 36400, Loss: 0.0001266113540623337\n" 5374 | ] 5375 | }, 5376 | { 5377 | "name": "stderr", 5378 | "output_type": "stream", 5379 | "text": [ 5380 | " 9%|██████▋ | 36505/400000 [18:22<3:04:57, 32.75it/s]" 5381 | ] 5382 | }, 5383 | { 5384 | "name": "stdout", 5385 | "output_type": "stream", 5386 | "text": [ 5387 | "Epoch: 36500, Loss: 0.00012590088590513915\n" 5388 | ] 5389 | }, 5390 | { 5391 | "name": "stderr", 5392 | "output_type": "stream", 5393 | "text": [ 5394 | " 9%|██████▋ | 36605/400000 [18:25<3:00:09, 33.62it/s]" 5395 | ] 5396 | }, 5397 | { 5398 | "name": "stdout", 5399 | "output_type": "stream", 5400 | "text": [ 5401 | "Epoch: 36600, Loss: 0.00012504182814154774\n" 5402 | ] 5403 | }, 5404 | { 5405 | "name": "stderr", 5406 | "output_type": "stream", 5407 | "text": [ 5408 | " 9%|██████▋ | 36705/400000 [18:28<2:57:21, 34.14it/s]" 5409 | ] 5410 | }, 5411 | { 5412 | "name": "stdout", 5413 | "output_type": "stream", 5414 | "text": [ 5415 | "Epoch: 36700, Loss: 0.00012423808220773935\n" 5416 | ] 5417 | }, 5418 | { 5419 | "name": "stderr", 5420 | "output_type": "stream", 5421 | "text": [ 5422 | " 9%|██████▋ | 36805/400000 [18:31<2:59:01, 33.81it/s]" 5423 | ] 5424 | }, 5425 | { 5426 | "name": "stdout", 5427 | "output_type": "stream", 5428 | "text": [ 5429 | "Epoch: 36800, Loss: 0.0001235220697708428\n" 5430 | ] 5431 | }, 5432 | { 5433 | "name": "stderr", 5434 | "output_type": "stream", 5435 | "text": [ 5436 | " 9%|██████▋ | 36905/400000 [18:34<3:02:57, 33.08it/s]" 5437 | ] 5438 | }, 5439 | { 5440 | "name": "stdout", 5441 | "output_type": "stream", 5442 | "text": [ 5443 | "Epoch: 36900, Loss: 0.00012283690739423037\n" 5444 | ] 5445 | }, 5446 | { 5447 | "name": "stderr", 5448 | "output_type": "stream", 5449 | "text": [ 5450 | " 9%|██████▊ | 37005/400000 [18:37<3:00:49, 33.46it/s]" 5451 | ] 5452 | }, 5453 | { 5454 | "name": "stdout", 5455 | "output_type": "stream", 5456 | "text": [ 5457 | "Epoch: 37000, Loss: 0.00012209525448270142\n" 5458 | ] 5459 | }, 5460 | { 5461 | "name": "stderr", 5462 | "output_type": "stream", 5463 | "text": [ 5464 | " 9%|██████▊ | 37105/400000 [18:40<3:01:23, 33.34it/s]" 5465 | ] 5466 | }, 5467 | { 5468 | "name": "stdout", 5469 | "output_type": "stream", 5470 | "text": [ 5471 | "Epoch: 37100, Loss: 0.00012132574920542538\n" 5472 | ] 5473 | }, 5474 | { 5475 | "name": "stderr", 5476 | "output_type": "stream", 5477 | "text": [ 5478 | " 9%|██████▊ | 37205/400000 [18:43<3:02:11, 33.19it/s]" 5479 | ] 5480 | }, 5481 | { 5482 | "name": "stdout", 5483 | "output_type": "stream", 5484 | "text": [ 5485 | "Epoch: 37200, Loss: 0.00012071585661033168\n" 5486 | ] 5487 | }, 5488 | { 5489 | "name": "stderr", 5490 | "output_type": "stream", 5491 | "text": [ 5492 | " 9%|██████▊ | 37305/400000 [18:46<2:59:31, 33.67it/s]" 5493 | ] 5494 | }, 5495 | { 5496 | "name": "stdout", 5497 | "output_type": "stream", 5498 | "text": [ 5499 | "Epoch: 37300, Loss: 0.00012001744471490383\n" 5500 | ] 5501 | }, 5502 | { 5503 | "name": "stderr", 5504 | "output_type": "stream", 5505 | "text": [ 5506 | " 9%|██████▊ | 37405/400000 [18:49<3:02:10, 33.17it/s]" 5507 | ] 5508 | }, 5509 | { 5510 | "name": "stdout", 5511 | "output_type": "stream", 5512 | "text": [ 5513 | "Epoch: 37400, Loss: 0.00011933683708775789\n" 5514 | ] 5515 | }, 5516 | { 5517 | "name": "stderr", 5518 | "output_type": "stream", 5519 | "text": [ 5520 | " 9%|██████▊ | 37505/400000 [18:52<2:59:46, 33.61it/s]" 5521 | ] 5522 | }, 5523 | { 5524 | "name": "stdout", 5525 | "output_type": "stream", 5526 | "text": [ 5527 | "Epoch: 37500, Loss: 0.00011857919162139297\n" 5528 | ] 5529 | }, 5530 | { 5531 | "name": "stderr", 5532 | "output_type": "stream", 5533 | "text": [ 5534 | " 9%|██████▊ | 37605/400000 [18:55<3:01:24, 33.29it/s]" 5535 | ] 5536 | }, 5537 | { 5538 | "name": "stdout", 5539 | "output_type": "stream", 5540 | "text": [ 5541 | "Epoch: 37600, Loss: 0.00011789163545472547\n" 5542 | ] 5543 | }, 5544 | { 5545 | "name": "stderr", 5546 | "output_type": "stream", 5547 | "text": [ 5548 | " 9%|██████▉ | 37705/400000 [18:58<3:01:18, 33.30it/s]" 5549 | ] 5550 | }, 5551 | { 5552 | "name": "stdout", 5553 | "output_type": "stream", 5554 | "text": [ 5555 | "Epoch: 37700, Loss: 0.00011716452718246728\n" 5556 | ] 5557 | }, 5558 | { 5559 | "name": "stderr", 5560 | "output_type": "stream", 5561 | "text": [ 5562 | " 9%|██████▉ | 37805/400000 [19:01<3:03:32, 32.89it/s]" 5563 | ] 5564 | }, 5565 | { 5566 | "name": "stdout", 5567 | "output_type": "stream", 5568 | "text": [ 5569 | "Epoch: 37800, Loss: 0.0001163617562269792\n" 5570 | ] 5571 | }, 5572 | { 5573 | "name": "stderr", 5574 | "output_type": "stream", 5575 | "text": [ 5576 | " 9%|██████▉ | 37905/400000 [19:04<3:00:56, 33.35it/s]" 5577 | ] 5578 | }, 5579 | { 5580 | "name": "stdout", 5581 | "output_type": "stream", 5582 | "text": [ 5583 | "Epoch: 37900, Loss: 0.00011568082118174061\n" 5584 | ] 5585 | }, 5586 | { 5587 | "name": "stderr", 5588 | "output_type": "stream", 5589 | "text": [ 5590 | " 10%|██████▉ | 38005/400000 [19:07<3:02:04, 33.13it/s]" 5591 | ] 5592 | }, 5593 | { 5594 | "name": "stdout", 5595 | "output_type": "stream", 5596 | "text": [ 5597 | "Epoch: 38000, Loss: 0.00011497325613163412\n" 5598 | ] 5599 | }, 5600 | { 5601 | "name": "stderr", 5602 | "output_type": "stream", 5603 | "text": [ 5604 | " 10%|██████▉ | 38105/400000 [19:10<3:02:38, 33.02it/s]" 5605 | ] 5606 | }, 5607 | { 5608 | "name": "stdout", 5609 | "output_type": "stream", 5610 | "text": [ 5611 | "Epoch: 38100, Loss: 0.00011417432688176632\n" 5612 | ] 5613 | }, 5614 | { 5615 | "name": "stderr", 5616 | "output_type": "stream", 5617 | "text": [ 5618 | " 10%|██████▉ | 38205/400000 [19:13<2:58:37, 33.76it/s]" 5619 | ] 5620 | }, 5621 | { 5622 | "name": "stdout", 5623 | "output_type": "stream", 5624 | "text": [ 5625 | "Epoch: 38200, Loss: 0.0001135203056037426\n" 5626 | ] 5627 | }, 5628 | { 5629 | "name": "stderr", 5630 | "output_type": "stream", 5631 | "text": [ 5632 | " 10%|██████▉ | 38305/400000 [19:16<3:08:45, 31.94it/s]" 5633 | ] 5634 | }, 5635 | { 5636 | "name": "stdout", 5637 | "output_type": "stream", 5638 | "text": [ 5639 | "Epoch: 38300, Loss: 0.0001128232543123886\n" 5640 | ] 5641 | }, 5642 | { 5643 | "name": "stderr", 5644 | "output_type": "stream", 5645 | "text": [ 5646 | " 10%|███████ | 38405/400000 [19:19<3:00:50, 33.33it/s]" 5647 | ] 5648 | }, 5649 | { 5650 | "name": "stdout", 5651 | "output_type": "stream", 5652 | "text": [ 5653 | "Epoch: 38400, Loss: 0.0001121823224821128\n" 5654 | ] 5655 | }, 5656 | { 5657 | "name": "stderr", 5658 | "output_type": "stream", 5659 | "text": [ 5660 | " 10%|███████ | 38505/400000 [19:22<3:03:49, 32.77it/s]" 5661 | ] 5662 | }, 5663 | { 5664 | "name": "stdout", 5665 | "output_type": "stream", 5666 | "text": [ 5667 | "Epoch: 38500, Loss: 0.000111468143586535\n" 5668 | ] 5669 | }, 5670 | { 5671 | "name": "stderr", 5672 | "output_type": "stream", 5673 | "text": [ 5674 | " 10%|███████ | 38605/400000 [19:25<3:04:55, 32.57it/s]" 5675 | ] 5676 | }, 5677 | { 5678 | "name": "stdout", 5679 | "output_type": "stream", 5680 | "text": [ 5681 | "Epoch: 38600, Loss: 0.00011082083074143156\n" 5682 | ] 5683 | }, 5684 | { 5685 | "name": "stderr", 5686 | "output_type": "stream", 5687 | "text": [ 5688 | " 10%|███████ | 38707/400000 [19:28<3:22:14, 29.77it/s]" 5689 | ] 5690 | }, 5691 | { 5692 | "name": "stdout", 5693 | "output_type": "stream", 5694 | "text": [ 5695 | "Epoch: 38700, Loss: 0.0001102270616684109\n" 5696 | ] 5697 | }, 5698 | { 5699 | "name": "stderr", 5700 | "output_type": "stream", 5701 | "text": [ 5702 | " 10%|███████ | 38807/400000 [19:31<3:14:09, 31.00it/s]" 5703 | ] 5704 | }, 5705 | { 5706 | "name": "stdout", 5707 | "output_type": "stream", 5708 | "text": [ 5709 | "Epoch: 38800, Loss: 0.00010965800902340561\n" 5710 | ] 5711 | }, 5712 | { 5713 | "name": "stderr", 5714 | "output_type": "stream", 5715 | "text": [ 5716 | " 10%|███████ | 38904/400000 [19:35<3:26:06, 29.20it/s]" 5717 | ] 5718 | }, 5719 | { 5720 | "name": "stdout", 5721 | "output_type": "stream", 5722 | "text": [ 5723 | "Epoch: 38900, Loss: 0.000109059750684537\n" 5724 | ] 5725 | }, 5726 | { 5727 | "name": "stderr", 5728 | "output_type": "stream", 5729 | "text": [ 5730 | " 10%|███████ | 39004/400000 [19:38<3:14:35, 30.92it/s]" 5731 | ] 5732 | }, 5733 | { 5734 | "name": "stdout", 5735 | "output_type": "stream", 5736 | "text": [ 5737 | "Epoch: 39000, Loss: 0.00010853647836484015\n" 5738 | ] 5739 | }, 5740 | { 5741 | "name": "stderr", 5742 | "output_type": "stream", 5743 | "text": [ 5744 | " 10%|███████▏ | 39106/400000 [19:42<3:16:36, 30.59it/s]" 5745 | ] 5746 | }, 5747 | { 5748 | "name": "stdout", 5749 | "output_type": "stream", 5750 | "text": [ 5751 | "Epoch: 39100, Loss: 0.00010789328371174634\n" 5752 | ] 5753 | }, 5754 | { 5755 | "name": "stderr", 5756 | "output_type": "stream", 5757 | "text": [ 5758 | " 10%|███████▏ | 39206/400000 [19:45<3:33:57, 28.10it/s]" 5759 | ] 5760 | }, 5761 | { 5762 | "name": "stdout", 5763 | "output_type": "stream", 5764 | "text": [ 5765 | "Epoch: 39200, Loss: 0.0001072462837328203\n" 5766 | ] 5767 | }, 5768 | { 5769 | "name": "stderr", 5770 | "output_type": "stream", 5771 | "text": [ 5772 | " 10%|███████▏ | 39306/400000 [19:49<3:24:58, 29.33it/s]" 5773 | ] 5774 | }, 5775 | { 5776 | "name": "stdout", 5777 | "output_type": "stream", 5778 | "text": [ 5779 | "Epoch: 39300, Loss: 0.00010663046123227105\n" 5780 | ] 5781 | }, 5782 | { 5783 | "name": "stderr", 5784 | "output_type": "stream", 5785 | "text": [ 5786 | " 10%|███████▏ | 39406/400000 [19:52<2:58:17, 33.71it/s]" 5787 | ] 5788 | }, 5789 | { 5790 | "name": "stdout", 5791 | "output_type": "stream", 5792 | "text": [ 5793 | "Epoch: 39400, Loss: 0.00010609682067297399\n" 5794 | ] 5795 | }, 5796 | { 5797 | "name": "stderr", 5798 | "output_type": "stream", 5799 | "text": [ 5800 | " 10%|███████▏ | 39506/400000 [19:55<3:03:12, 32.79it/s]" 5801 | ] 5802 | }, 5803 | { 5804 | "name": "stdout", 5805 | "output_type": "stream", 5806 | "text": [ 5807 | "Epoch: 39500, Loss: 0.00010558182111708447\n" 5808 | ] 5809 | }, 5810 | { 5811 | "name": "stderr", 5812 | "output_type": "stream", 5813 | "text": [ 5814 | " 10%|███████▏ | 39606/400000 [19:58<2:59:49, 33.40it/s]" 5815 | ] 5816 | }, 5817 | { 5818 | "name": "stdout", 5819 | "output_type": "stream", 5820 | "text": [ 5821 | "Epoch: 39600, Loss: 0.00010506627586437389\n" 5822 | ] 5823 | }, 5824 | { 5825 | "name": "stderr", 5826 | "output_type": "stream", 5827 | "text": [ 5828 | " 10%|███████▏ | 39706/400000 [20:01<2:57:45, 33.78it/s]" 5829 | ] 5830 | }, 5831 | { 5832 | "name": "stdout", 5833 | "output_type": "stream", 5834 | "text": [ 5835 | "Epoch: 39700, Loss: 0.00010456699965288863\n" 5836 | ] 5837 | }, 5838 | { 5839 | "name": "stderr", 5840 | "output_type": "stream", 5841 | "text": [ 5842 | " 10%|███████▎ | 39806/400000 [20:04<2:56:33, 34.00it/s]" 5843 | ] 5844 | }, 5845 | { 5846 | "name": "stdout", 5847 | "output_type": "stream", 5848 | "text": [ 5849 | "Epoch: 39800, Loss: 0.00010401687177363783\n" 5850 | ] 5851 | }, 5852 | { 5853 | "name": "stderr", 5854 | "output_type": "stream", 5855 | "text": [ 5856 | " 10%|███████▎ | 39906/400000 [20:07<3:07:36, 31.99it/s]" 5857 | ] 5858 | }, 5859 | { 5860 | "name": "stdout", 5861 | "output_type": "stream", 5862 | "text": [ 5863 | "Epoch: 39900, Loss: 0.00010353805555496365\n" 5864 | ] 5865 | }, 5866 | { 5867 | "name": "stderr", 5868 | "output_type": "stream", 5869 | "text": [ 5870 | " 10%|███████▎ | 40006/400000 [20:10<3:03:14, 32.74it/s]" 5871 | ] 5872 | }, 5873 | { 5874 | "name": "stdout", 5875 | "output_type": "stream", 5876 | "text": [ 5877 | "Epoch: 40000, Loss: 0.00010301281145075336\n" 5878 | ] 5879 | }, 5880 | { 5881 | "name": "stderr", 5882 | "output_type": "stream", 5883 | "text": [ 5884 | " 10%|███████▎ | 40106/400000 [20:13<3:00:58, 33.14it/s]" 5885 | ] 5886 | }, 5887 | { 5888 | "name": "stdout", 5889 | "output_type": "stream", 5890 | "text": [ 5891 | "Epoch: 40100, Loss: 0.00010240800475003198\n" 5892 | ] 5893 | }, 5894 | { 5895 | "name": "stderr", 5896 | "output_type": "stream", 5897 | "text": [ 5898 | " 10%|███████▎ | 40206/400000 [20:16<2:58:58, 33.50it/s]" 5899 | ] 5900 | }, 5901 | { 5902 | "name": "stdout", 5903 | "output_type": "stream", 5904 | "text": [ 5905 | "Epoch: 40200, Loss: 0.00010184713028138503\n" 5906 | ] 5907 | }, 5908 | { 5909 | "name": "stderr", 5910 | "output_type": "stream", 5911 | "text": [ 5912 | " 10%|███████▎ | 40306/400000 [20:19<3:05:29, 32.32it/s]" 5913 | ] 5914 | }, 5915 | { 5916 | "name": "stdout", 5917 | "output_type": "stream", 5918 | "text": [ 5919 | "Epoch: 40300, Loss: 0.0001013457658700645\n" 5920 | ] 5921 | }, 5922 | { 5923 | "name": "stderr", 5924 | "output_type": "stream", 5925 | "text": [ 5926 | " 10%|███████▎ | 40406/400000 [20:22<2:59:34, 33.38it/s]" 5927 | ] 5928 | }, 5929 | { 5930 | "name": "stdout", 5931 | "output_type": "stream", 5932 | "text": [ 5933 | "Epoch: 40400, Loss: 0.00010077251499751583\n" 5934 | ] 5935 | }, 5936 | { 5937 | "name": "stderr", 5938 | "output_type": "stream", 5939 | "text": [ 5940 | " 10%|███████▍ | 40506/400000 [20:25<3:01:41, 32.98it/s]" 5941 | ] 5942 | }, 5943 | { 5944 | "name": "stdout", 5945 | "output_type": "stream", 5946 | "text": [ 5947 | "Epoch: 40500, Loss: 0.00010026506060967222\n" 5948 | ] 5949 | }, 5950 | { 5951 | "name": "stderr", 5952 | "output_type": "stream", 5953 | "text": [ 5954 | " 10%|███████▍ | 40554/400000 [20:26<3:01:15, 33.05it/s]\n" 5955 | ] 5956 | } 5957 | ], 5958 | "source": [ 5959 | "# Second Phase of training with complete boundary conditions\n", 5960 | "LR= 1\n", 5961 | "Epochs = 400000\n", 5962 | "xy_col, xy_bnd, uv_bnd, xy_outlet = prepare_data(N_c=50000) #preparing domain points which PDE residual will be calculated on it\n", 5963 | "trainer_ = PINNManager(last_model.to(DEVICE),LR,'Navier_Stokes')\n", 5964 | "total_loss, model_trained_ = trainer_.lbfgs_optimizer(Epochs,xy_col,xy_bnd,uv_bnd,xy_outlet)" 5965 | ] 5966 | } 5967 | ], 5968 | "metadata": { 5969 | "kernelspec": { 5970 | "display_name": "Python 3 (ipykernel)", 5971 | "language": "python", 5972 | "name": "python3" 5973 | }, 5974 | "language_info": { 5975 | "codemirror_mode": { 5976 | "name": "ipython", 5977 | "version": 3 5978 | }, 5979 | "file_extension": ".py", 5980 | "mimetype": "text/x-python", 5981 | "name": "python", 5982 | "nbconvert_exporter": "python", 5983 | "pygments_lexer": "ipython3", 5984 | "version": "3.10.4" 5985 | } 5986 | }, 5987 | "nbformat": 4, 5988 | "nbformat_minor": 2 5989 | } 5990 | --------------------------------------------------------------------------------