==================================================================================================== import torch import torch.nn as nn import torch.optim as optim from itertools import chain # Parts of the code are modifications of Pytorch's AdamW optimizer # Parts of the code are modifications of code from https://github.com/jiaweizzhao/GaLore/blob/master/galore_torch/galore_projector.py class SOAP(optim.Optimizer): """ Implements SOAP algorithm (https://arxiv.org/abs/2409.11321). Parameters: params (`Iterable[nn.parameter.Parameter]`): Iterable of parameters to optimize or dictionaries defining parameter groups. lr (`float`, *optional*, defaults to 0.003): The learning rate to use. betas (`Tuple[float,float]`, *optional*, defaults to `(0.95, 0.95)`): Adam's betas parameters (b1, b2). shampoo_beta (`float`, *optional*, defaults to -1): If >= 0, use this beta for the preconditioner (L and R in paper, state['GG'] below) moving average instead of betas[1]. eps (`float`, *optional*, defaults to 1e-08): Adam's epsilon for numerical stability. weight_decay (`float`, *optional*, defaults to 0.01): weight decay coefficient. precondition_frequency (`int`, *optional*, defaults to 10): How often to update the preconditioner. max_precond_dim (`int`, *optional*, defaults to 10000): Maximum dimension of the preconditioner. Set to 10000, so that we exclude most common vocab sizes while including layers. merge_dims (`bool`, *optional*, defaults to `False`): Whether or not to merge dimensions of the preconditioner. precondition_1d (`bool`, *optional*, defaults to `False`): Whether or not to precondition 1D gradients. normalize_grads (`bool`, *optional*, defaults to `False`): Whether or not to normalize gradients per layer. Helps at large precondition_frequency (~100 in our experiments), but hurts performance at small precondition_frequency (~10 in our experiments). data_format (`str`, *optional*, defaults to `channels_first`): Data format of the input for convolutional layers. Should be "channels_last" for data_format of NHWC and "channels_first" for NCHW. correct_bias (`bool`, *optional*, defaults to `True`): Whether or not to use bias correction in Adam. """ def __init__( self, params, lr: float = 3e-3, betas=(0.95, 0.95), shampoo_beta: float= -1, eps: float = 1e-8, weight_decay: float = 0.01, precondition_frequency: int=10, max_precond_dim: int=10000, # merge_dims: bool = False, # Merge dimensions till the product of the dimensions is less than or equal to max_precond_dim. precondition_1d: bool = False, normalize_grads: bool = False, data_format: str = "channels_first", correct_bias: bool = True, ): defaults = { "lr": lr, "betas": betas, "shampoo_beta": shampoo_beta, "eps": eps, "weight_decay": weight_decay, "precondition_frequency": precondition_frequency, "max_precond_dim": max_precond_dim, "merge_dims": merge_dims, "precondition_1d": precondition_1d, "normalize_grads": normalize_grads, "correct_bias": correct_bias, } super().__init__(params, defaults) self._data_format = data_format def merge_dims(self, grad, max_precond_dim): """ Merges dimensions of the gradient tensor till the product of the dimensions is less than or equal to max_precond_dim. """ assert self._data_format in ["channels_first", "channels_last"] if self._data_format == "channels_last" and grad.dim() == 4: grad = grad.permute(0, 3, 1, 2) shape = grad.shape new_shape = [] curr_shape = 1 for sh in shape: temp_shape = curr_shape * sh if temp_shape > max_precond_dim: if curr_shape > 1: new_shape.append(curr_shape) curr_shape = sh else: new_shape.append(sh) curr_shape = 1 else: curr_shape = temp_shape if curr_shape > 1 or len(new_shape)==0: new_shape.append(curr_shape) new_grad = grad.reshape(new_shape) return new_grad @torch.no_grad() def step(self): """ Performs a single optimization step. Arguments: closure (`Callable`, *optional*): A closure that reevaluates the model and returns the loss. """ loss = None for group in self.param_groups: for p in group["params"]: if p.grad is None: continue grad = p.grad state = self.state[p] if "step" not in state: state["step"] = 0 # State initialization if "exp_avg" not in state: # Exponential moving average of gradient values state["exp_avg"] = torch.zeros_like(grad) # Exponential moving average of squared gradient values state["exp_avg_sq"] = torch.zeros_like(grad) if 'Q' not in state: self.init_preconditioner( grad, state, precondition_frequency=group['precondition_frequency'], precondition_1d=group['precondition_1d'], shampoo_beta=(group['shampoo_beta'] if group['shampoo_beta'] >= 0 else group["betas"][1]), max_precond_dim=group['max_precond_dim'], merge_dims=group["merge_dims"], ) self.update_preconditioner(grad, state, max_precond_dim=group['max_precond_dim'], merge_dims=group["merge_dims"], precondition_1d=group["precondition_1d"]) continue # first step is skipped so that we never use the current gradients in the projection. # Projecting gradients to the eigenbases of Shampoo's preconditioner # i.e. projecting to the eigenbases of matrices in state['GG'] grad_projected = self.project(grad, state, merge_dims=group["merge_dims"], max_precond_dim=group['max_precond_dim']) exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"] beta1, beta2 = group["betas"] state["step"] += 1 # Decay the first and second moment running average coefficient # In-place operations to update the averages at the same time exp_avg.mul_(beta1).add_(grad, alpha=(1.0 - beta1)) exp_avg_sq.mul_(beta2).add_(grad_projected.square(), alpha=(1.0 - beta2)) denom = exp_avg_sq.sqrt().add_(group["eps"]) # Projecting the exponential moving average of gradients to the eigenbases of Shampoo's preconditioner # i.e. projecting to the eigenbases of matrices in state['GG'] exp_avg_projected = self.project(exp_avg, state, merge_dims=group["merge_dims"], max_precond_dim=group['max_precond_dim']) step_size = group["lr"] if group["correct_bias"]: bias_correction1 = 1.0 - beta1 ** (state["step"]) bias_correction2 = 1.0 - beta2 ** (state["step"]) step_size = step_size * (bias_correction2 ** .5) / bias_correction1 # Projecting back the preconditioned (by Adam) exponential moving average of gradients # to the original space norm_grad = self.project_back(exp_avg_projected / denom, state, merge_dims=group["merge_dims"], max_precond_dim=group['max_precond_dim']) if group["normalize_grads"]: norm_grad = norm_grad / (1e-30+torch.mean(norm_grad**2)**0.5) p.add_(norm_grad, alpha=-step_size) # From AdamW code: Just adding the square of the weights to the loss function is *not* # the correct way of using L2 regularization/weight decay with Adam, # since that will interact with the m and v parameters in strange ways. # # Instead we want to decay the weights in a manner that doesn't interact # with the m/v parameters. This is equivalent to adding the square # of the weights to the loss with plain (non-momentum) SGD. # Add weight decay at the end (fixed version) if group["weight_decay"] > 0.0: p.add_(p, alpha=(-group["lr"] * group["weight_decay"])) # Update is done after the gradient step to avoid using current gradients in the projection. self.update_preconditioner(grad, state, max_precond_dim=group['max_precond_dim'], merge_dims=group["merge_dims"], precondition_1d=group["precondition_1d"]) return loss def init_preconditioner(self, grad, state, precondition_frequency=10, shampoo_beta=0.95, max_precond_dim=10000, precondition_1d=False, merge_dims=False): """ Initializes the preconditioner matrices (L and R in the paper). """ state['GG'] = [] # Will hold all the preconditioner matrices (L and R in the paper). if grad.dim() == 1: if not precondition_1d or grad.shape[0] > max_precond_dim: state['GG'].append([]) else: state['GG'].append(torch.zeros(grad.shape[0], grad.shape[0], device=grad.device)) else: if merge_dims: grad = self.merge_dims(grad, max_precond_dim) for sh in grad.shape: if sh > max_precond_dim: state['GG'].append([]) else: state['GG'].append(torch.zeros(sh, sh, device=grad.device)) state['Q'] = None # Will hold all the eigenbases of the preconditioner. state['precondition_frequency'] = precondition_frequency state['shampoo_beta'] = shampoo_beta def project(self, grad, state, merge_dims=False, max_precond_dim=10000): """ Projects the gradient to the eigenbases of the preconditioner. """ original_shape = grad.shape if merge_dims: if grad.dim() == 4 and self._data_format == 'channels_last': permuted_shape = grad.permute(0, 3, 1, 2).shape grad = self.merge_dims(grad, max_precond_dim) for mat in state['Q']: if len(mat) > 0: grad = torch.tensordot( grad, mat, dims=[[0], [0]], ) else: permute_order = list(range(1, len(grad.shape))) + [0] grad = grad.permute(permute_order) if merge_dims: if self._data_format == 'channels_last' and len(original_shape) == 4: grad = grad.reshape(permuted_shape).permute(0, 2, 3, 1) else: grad = grad.reshape(original_shape) return grad def update_preconditioner(self, grad, state, max_precond_dim=10000, merge_dims=False, precondition_1d=False): """ Updates the preconditioner matrices and the eigenbases (L, R, Q_L, Q_R in the paper). """ if grad.dim() == 1: if precondition_1d and grad.shape[0] <= max_precond_dim: state['GG'][0].lerp_(grad.unsqueeze(1) @ grad.unsqueeze(0), 1-state['shampoo_beta']) else: if merge_dims: new_grad = self.merge_dims(grad, max_precond_dim) for idx, sh in enumerate(new_grad.shape): if sh <= max_precond_dim: outer_product = torch.tensordot( new_grad, new_grad, dims=[[*chain(range(idx), range(idx + 1, len(new_grad.shape)))]] * 2, ) state['GG'][idx].lerp_(outer_product, 1-state['shampoo_beta']) else: for idx, sh in enumerate(grad.shape): if sh <= max_precond_dim: outer_product = torch.tensordot( grad, grad, # Contracts across all dimensions except for k. dims=[[*chain(range(idx), range(idx + 1, len(grad.shape)))]] * 2, ) state['GG'][idx].lerp_(outer_product, 1-state['shampoo_beta']) if state['Q'] is None: state['Q'] = self.get_orthogonal_matrix(state['GG']) if state['step'] > 0 and state['step'] % state['precondition_frequency'] == 0: state['Q'] = self.get_orthogonal_matrix_QR(state, max_precond_dim, merge_dims) def project_back(self, grad, state, merge_dims=False, max_precond_dim=10000): """ Projects the gradient back to the original space. """ original_shape = grad.shape if merge_dims: if self._data_format == 'channels_last' and grad.dim() == 4: permuted_shape = grad.permute(0, 3, 1, 2).shape grad = self.merge_dims(grad, max_precond_dim) for mat in state['Q']: if len(mat) > 0: grad = torch.tensordot( grad, mat, dims=[[0], [1]], ) else: permute_order = list(range(1, len(grad.shape))) + [0] grad = grad.permute(permute_order) if merge_dims: if self._data_format == 'channels_last' and len(original_shape) == 4: grad = grad.reshape(permuted_shape).permute(0, 2, 3, 1) else: grad = grad.reshape(original_shape) return grad def get_orthogonal_matrix(self, mat): """ Computes the eigenbases of the preconditioner using torch.linalg.eigh decomposition. """ matrix = [] for m in mat: if len(m) == 0: matrix.append([]) continue if m.data.dtype != torch.float: float_data = False original_type = m.data.dtype original_device = m.data.device matrix.append(m.data.float()) else: float_data = True matrix.append(m.data) final = [] for m in matrix: if len(m) == 0: final.append([]) continue try: _, Q = torch.linalg.eigh(m+1e-30*torch.eye(m.shape[0], device=m.device)) except: _, Q = torch.linalg.eigh(m.to(torch.float64)+1e-30*torch.eye(m.shape[0], device=m.device)) Q = Q.to(m.dtype) Q = torch.flip(Q, [1]) if not float_data: Q = Q.to(original_device).type(original_type) final.append(Q) return final def get_orthogonal_matrix_QR(self, state, max_precond_dim=10000, merge_dims=False): """ Computes the eigenbases of the preconditioner using one round of power iteration followed by torch.linalg.qr decomposition. """ precond_list = state['GG'] orth_list = state['Q'] matrix = [] orth_matrix = [] for m,o in zip(precond_list, orth_list): if len(m) == 0: matrix.append([]) orth_matrix.append([]) continue if m.data.dtype != torch.float: float_data = False original_type = m.data.dtype original_device = m.data.device matrix.append(m.data.float()) orth_matrix.append(o.data.float()) else: float_data = True matrix.append(m.data.float()) orth_matrix.append(o.data.float()) orig_shape = state['exp_avg_sq'].shape if self._data_format == 'channels_last' and len(orig_shape) == 4: permuted_shape = state['exp_avg_sq'].permute(0, 3, 1, 2).shape if merge_dims: exp_avg_sq = self.merge_dims(state['exp_avg_sq'], max_precond_dim) else: exp_avg_sq = state['exp_avg_sq'] final = [] for ind, (m,o) in enumerate(zip(matrix, orth_matrix)): if len(m)==0: final.append([]) continue est_eig = torch.diag(o.T @ m @ o) sort_idx = torch.argsort(est_eig, descending=True) exp_avg_sq = exp_avg_sq.index_select(ind, sort_idx) o = o[:,sort_idx] power_iter = m @ o Q, _ = torch.linalg.qr(power_iter) if not float_data: Q = Q.to(original_device).type(original_type) final.append(Q) if merge_dims: if self._data_format == 'channels_last' and len(orig_shape) == 4: exp_avg_sq = exp_avg_sq.reshape(permuted_shape).permute(0, 2, 3, 1) else: exp_avg_sq = exp_avg_sq.reshape(orig_shape) state['exp_avg_sq'] = exp_avg_sq return final import os import sys with open(sys.argv[0]) as f: code = f.read() # read the code of this file ASAP, for logging import uuid import glob import time from dataclasses import dataclass import numpy as np import torch from torch import nn import torch.nn.functional as F import torch.distributed as dist import torch._inductor.config as config from torch.nn.parallel import DistributedDataParallel as DDP # ----------------------------------------------------------------------------- # PyTorch nn.Module definitions for the GPT-2 model class Rotary(torch.nn.Module): def __init__(self, dim, base=10000): super().__init__() inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim)) self.register_buffer("inv_freq", inv_freq) self.seq_len_cached = None self.cos_cached = None self.sin_cached = None def forward(self, x): seq_len = x.shape[1] if seq_len != self.seq_len_cached: self.seq_len_cached = seq_len t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq) freqs = torch.outer(t, self.inv_freq).to(x.device) self.cos_cached = freqs.cos() self.sin_cached = freqs.sin() return self.cos_cached[None, :, None, :], self.sin_cached[None, :, None, :] def apply_rotary_emb(x, cos, sin): assert x.ndim == 4 # multihead attention d = x.shape[3]//2 x1 = x[..., :d] x2 = x[..., d:] y1 = x1 * cos + x2 * sin y2 = x1 * (-sin) + x2 * cos return torch.cat([y1, y2], 3) def rmsnorm(x0, eps=1e-6): x = x0.float() x = x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + eps) return x.type_as(x0) class CausalSelfAttention(nn.Module): def __init__(self, config): super().__init__() self.n_head = config.n_head self.n_embd = config.n_embd self.head_dim = self.n_embd // self.n_head assert self.n_embd % self.n_head == 0 self.c_q = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_k = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_v = nn.Linear(self.n_embd, self.n_embd, bias=False) # output projection self.c_proj = nn.Linear(self.n_embd, self.n_embd, bias=False) self.rotary = Rotary(self.head_dim) def forward(self, x): B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd) q, k, v = self.c_q(x), self.c_k(x), self.c_v(x) k = k.view(B, T, self.n_head, self.head_dim) q = q.view(B, T, self.n_head, self.head_dim) v = v.view(B, T, self.n_head, self.head_dim) cos, sin = self.rotary(q) q = apply_rotary_emb(q, cos, sin) k = apply_rotary_emb(k, cos, sin) y = F.scaled_dot_product_attention(q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), is_causal=True) y = y.transpose(1, 2).contiguous().view(B, T, C) # re-assemble all head outputs side by side # output projection y = self.c_proj(y) return y class MLP(nn.Module): def __init__(self, config): super().__init__() self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd, bias=False) self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd, bias=False) def forward(self, x): x = self.c_fc(x) x = F.gelu(x) x = self.c_proj(x) return x class Block(nn.Module): def __init__(self, config): super().__init__() self.attn = CausalSelfAttention(config) self.mlp = MLP(config) self.attn_scale = (1 / (2 * config.n_layer)**0.5) def forward(self, x): x = x + self.attn_scale * self.attn(rmsnorm(x)) x = x + self.mlp(rmsnorm(x)) return x # ----------------------------------------------------------------------------- # The main GPT-2 model @dataclass class GPTConfig: vocab_size : int = 50257 n_layer : int = 12 n_head : int = 12 n_embd : int = 768 class GPT(nn.Module): def __init__(self, config): super().__init__() self.config = config self.transformer = nn.ModuleDict(dict( wte = nn.Embedding(config.vocab_size, config.n_embd), h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]), )) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.transformer.wte.weight = self.lm_head.weight # https://paperswithcode.com/method/weight-tying def forward(self, idx, targets=None, return_logits=True): b, t = idx.size() pos = torch.arange(0, t, dtype=torch.long, device=idx.device) # shape (t) # forward the GPT model itself x = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) for block in self.transformer.h: x = block(x) x = rmsnorm(x) if targets is not None: # if we are given some desired targets also calculate the loss logits = self.lm_head(x) logits = logits.float() # use tf32/fp32 for logits loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1) else: # inference-time mini-optimization: only forward the lm_head on the very last position logits = self.lm_head(x[:, [-1], :]) # note: using list [-1] to preserve the time dim logits = logits.float() # use tf32/fp32 for logits loss = None # there are performance reasons why not returning logits is prudent, if not needed if not return_logits: logits = None return logits, loss # ----------------------------------------------------------------------------- # Our own simple Distributed Data Loader def _peek_data_shard(filename): # only reads the header, returns header data with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) if header[0] != 20240520: print("ERROR: magic number mismatch in the data .bin file!") print("---> HINT: Are you passing in a correct file with --input_bin?") print("---> HINT: Dataset encoding changed recently, re-run data prepro or refer again to README") print("---> HINT: For example re-run: `python dev/data/tinyshakespeare.py`, then re-try") exit(1) assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) return ntok # for now just return the number of tokens def _load_data_shard(filename): with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) assert header[0] == 20240520, "magic number mismatch in the data .bin file" assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) # the rest of it are tokens, stored as uint16 tokens = np.frombuffer(f.read(), dtype=np.uint16) assert len(tokens) == ntok, "number of tokens read does not match header?" return tokens class DistributedDataLoader: def __init__(self, filename_pattern, B, T, process_rank, num_processes): self.process_rank = process_rank self.num_processes = num_processes self.B = B self.T = T # glob files that match the pattern self.files = sorted(glob.glob(filename_pattern)) assert len(self.files) > 0, f"did not find any files that match the pattern {filename_pattern}" # load and validate all data shards, count number of tokens in total ntok_total = 0 for fname in self.files: shard_ntok = _peek_data_shard(fname) assert shard_ntok >= num_processes * B * T + 1 ntok_total += int(shard_ntok) self.ntok_total = ntok_total # kick things off self.reset() def reset(self): self.current_shard = 0 self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def advance(self): # advance to next data shard self.current_shard = (self.current_shard + 1) % len(self.files) self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def next_batch(self): B = self.B T = self.T buf = self.tokens[self.current_position : self.current_position+B*T+1] buf = torch.tensor(buf.astype(np.int32), dtype=torch.long) x = (buf[:-1]).view(B, T) # inputs y = (buf[1:]).view(B, T) # targets # advance current position and load next shard if necessary self.current_position += B * T * self.num_processes if self.current_position + (B * T * self.num_processes + 1) > len(self.tokens): self.advance() return x.cuda(), y.cuda() # ----------------------------------------------------------------------------- # int main @dataclass class Hyperparameters: # data hyperparams input_bin : str = 'data/fineweb10B/fineweb_train_*.bin' # input .bin to train on input_val_bin : str = 'data/fineweb10B/fineweb_val_*.bin' # input .bin to eval validation loss on # optimization hyperparams batch_size : int = 8*64 # batch size, in sequences, across all devices device_batch_size : int = 64 # batch size, in sequences, per device sequence_length : int = 1024 # sequence length, in tokens num_iterations : int = 6000 # number of iterations to run learning_rate : float = 0.0036 warmup_iters : int = 250 warmdown_iters : int = 1800 # number of iterations of linear warmup/warmdown for triangular or trapezoidal schedule # evaluation and logging hyperparams val_loss_every : int = 125 # every how many steps to evaluate val loss? 0 for only at the end val_tokens : int = 10485760 # how many tokens of validation data? it's important to keep this fixed for consistent comparisons save_every : int = 0 # every how many steps to save the checkpoint? 0 for only at the end args = Hyperparameters() # set up DDP (distributed data parallel). torchrun sets this env variable assert torch.cuda.is_available() dist.init_process_group(backend='nccl') ddp_rank = int(os.environ['RANK']) ddp_local_rank = int(os.environ['LOCAL_RANK']) ddp_world_size = int(os.environ['WORLD_SIZE']) device = f'cuda:{ddp_local_rank}' torch.cuda.set_device(device) print(f"using device: {device}") master_process = (ddp_rank == 0) # this process will do logging, checkpointing etc. # convenience variables B, T = args.device_batch_size, args.sequence_length # calculate the number of steps to take in the val loop. assert args.val_tokens % (B * T * ddp_world_size) == 0 val_steps = args.val_tokens // (B * T * ddp_world_size) # calculate the steps of gradient accumulation required to attain the desired global batch size. assert args.batch_size % (B * ddp_world_size) == 0 train_accumulation_steps = args.batch_size // (B * ddp_world_size) # load tokens train_loader = DistributedDataLoader(args.input_bin, B, T, ddp_rank, ddp_world_size) val_loader = DistributedDataLoader(args.input_val_bin, B, T, ddp_rank, ddp_world_size) if master_process: print(f"Training DataLoader: total number of tokens: {train_loader.ntok_total} across {len(train_loader.files)} files") print(f"Validation DataLoader: total number of tokens: {val_loader.ntok_total} across {len(val_loader.files)} files") x, y = train_loader.next_batch() # init the model from scratch num_vocab = 50257 model = GPT(GPTConfig(vocab_size=num_vocab, n_layer=12, n_head=12, n_embd=768)) model = model.cuda() if hasattr(config, "coordinate_descent_tuning"): config.coordinate_descent_tuning = True # suggested by @Chillee model = torch.compile(model) # here we wrap model into DDP container model = DDP(model, device_ids=[ddp_local_rank]) raw_model = model.module # always contains the "raw" unwrapped model ctx = torch.amp.autocast(device_type='cuda', dtype=torch.bfloat16) # init the optimizer(s) optimizer1 = torch.optim.AdamW(raw_model.lm_head.parameters(), lr=args.learning_rate, betas=(0.9, 0.95), weight_decay=0, fused=True) optimizer2 = SOAP(raw_model.transformer.h.parameters(), lr=0.5*args.learning_rate, betas=(.95, .95), weight_decay=0, precondition_frequency=10) optimizers = [optimizer1, optimizer2] # learning rate decay scheduler (linear warmup and warmdown) def get_lr(it): assert it <= args.num_iterations # 1) linear warmup for warmup_iters steps if it < args.warmup_iters: return (it+1) / args.warmup_iters # 2) constant lr for a while elif it < args.num_iterations - args.warmdown_iters: return 1.0 # 3) linear warmdown else: decay_ratio = (args.num_iterations - it) / args.warmdown_iters return decay_ratio schedulers = [torch.optim.lr_scheduler.LambdaLR(opt, get_lr) for opt in optimizers] # begin logging if master_process: run_id = str(uuid.uuid4()) logdir = 'logs/%s/' % run_id os.makedirs(logdir, exist_ok=True) logfile = 'logs/%s.txt' % run_id # create the log file with open(logfile, "w") as f: # begin the log by printing this file (the Python code) f.write('='*100 + '\n') f.write(code) f.write('='*100 + '\n') # log information about the hardware/software environment this is running on # and print the full `nvidia-smi` to file f.write(f"Running pytorch {torch.version.__version__} compiled for CUDA {torch.version.cuda}\nnvidia-smi:\n") import subprocess result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) f.write(f'{result.stdout}\n') f.write('='*100 + '\n') training_time_ms = 0 # start the clock torch.cuda.synchronize() t0 = time.time() # begin training train_loader.reset() for step in range(args.num_iterations + 1): last_step = (step == args.num_iterations) # This effectively ignores timing first 10 steps, which are slower for weird reasons. # Alternately, and slightly more correctly in terms of benchmarking, we could do 10 # steps with dummy data first, and then re-initialize the model and reset the loader. if step == 10: training_time_ms = 0 t0 = time.time() timed_steps = float('nan') if step <= 11 else (step - 10) + 1 # <= 11 to avoid bug in val # once in a while evaluate the validation dataset if (last_step or (args.val_loss_every > 0 and step % args.val_loss_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # run validation batches model.eval() val_loader.reset() val_loss = 0.0 for _ in range(val_steps): x_val, y_val = val_loader.next_batch() with torch.no_grad(): # of course, we'd like to use ctx here too, but that creates a torch.compile error for some reason _, loss = model(x_val, y_val, return_logits=False) val_loss += loss dist.all_reduce(val_loss, op=dist.ReduceOp.AVG) val_loss /= val_steps # log val loss to console and to logfile if master_process: print(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms') with open(logfile, "a") as f: f.write(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms\n') # start the clock again torch.cuda.synchronize() t0 = time.time() if master_process and (last_step or (args.save_every > 0 and step % args.save_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # save the state of the training process log = dict(step=step, code=code, model=raw_model.state_dict(), optimizers=[opt.state_dict() for opt in optimizers]) torch.save(log, 'logs/%s/state_step%06d.pt' % (run_id, step)) # start the clock again torch.cuda.synchronize() t0 = time.time() # bit confusing: we want to make sure to eval on 0th iteration # but also after the very last iteration. so we loop for step <= num_iterations # instead of just < num_iterations (one extra due to <=), only to do # the validation/sampling one last time, and then we break right here as we're done. if last_step: break # --------------- TRAINING SECTION BEGIN ----------------- model.train() for i in range(1, train_accumulation_steps+1): # forward pass with ctx: _, loss = model(x, y, return_logits=False) train_loss = loss.detach() # advance the dataset for the next batch x, y = train_loader.next_batch() # backward pass if i < train_accumulation_steps: with model.no_sync(): # there's no need to sync gradients every accumulation step loss.backward() else: loss.backward() # just sync on the last step for p in model.parameters(): p.grad /= train_accumulation_steps # step the optimizers and schedulers for opt, sched in zip(optimizers, schedulers): opt.step() sched.step() # null the gradients model.zero_grad(set_to_none=True) # --------------- TRAINING SECTION END ------------------- # everything that follows now is just diagnostics, prints, logging, etc. #dist.all_reduce(train_loss, op=dist.ReduceOp.AVG) # all-reducing the training loss would be more correct in terms of logging, but slower if master_process: approx_time = training_time_ms + 1000 * (time.time() - t0) print(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms") with open(logfile, "a") as f: f.write(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms\n") if master_process: print(f"peak memory consumption: {torch.cuda.max_memory_allocated() // 1024 // 1024} MiB") # ------------------------------------------------------------------------- # clean up nice dist.destroy_process_group() ==================================================================================================== Running pytorch 2.4.1+cu121 compiled for CUDA 12.1 nvidia-smi: Wed Oct 9 18:10:50 2024 +---------------------------------------------------------------------------------------+ | NVIDIA-SMI 535.129.03 Driver Version: 535.129.03 CUDA Version: 12.2 | |-----------------------------------------+----------------------+----------------------+ | GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |=========================================+======================+======================| | 0 NVIDIA H100 80GB HBM3 On | 00000000:61:00.0 Off | 0 | | N/A 30C P0 113W / 700W | 5789MiB / 81559MiB | 5% Default | | | | Disabled | +-----------------------------------------+----------------------+----------------------+ | 1 NVIDIA H100 80GB HBM3 On | 00000000:62:00.0 Off | 0 | | N/A 38C P0 119W / 700W | 5837MiB / 81559MiB | 9% Default | | | | Disabled | +-----------------------------------------+----------------------+----------------------+ | 2 NVIDIA H100 80GB HBM3 On | 00000000:63:00.0 Off | 0 | | N/A 39C P0 118W / 700W | 5837MiB / 81559MiB | 7% Default | | | | Disabled | +-----------------------------------------+----------------------+----------------------+ | 3 NVIDIA H100 80GB HBM3 On | 00000000:64:00.0 Off | 0 | | N/A 31C P0 110W / 700W | 5837MiB / 81559MiB | 4% Default | | | | Disabled | +-----------------------------------------+----------------------+----------------------+ | 4 NVIDIA H100 80GB HBM3 On | 00000000:6A:00.0 Off | 0 | | N/A 33C P0 120W / 700W | 5837MiB / 81559MiB | 8% Default | | | | Disabled | +-----------------------------------------+----------------------+----------------------+ | 5 NVIDIA H100 80GB HBM3 On | 00000000:6B:00.0 Off | 0 | | N/A 39C P0 123W / 700W | 5837MiB / 81559MiB | 2% Default | | | | Disabled | +-----------------------------------------+----------------------+----------------------+ | 6 NVIDIA H100 80GB HBM3 On | 00000000:6C:00.0 Off | 0 | | N/A 39C P0 114W / 700W | 5837MiB / 81559MiB | 2% Default | | | | Disabled | +-----------------------------------------+----------------------+----------------------+ | 7 NVIDIA H100 80GB HBM3 On | 00000000:6D:00.0 Off | 0 | | N/A 30C P0 113W / 700W | 5597MiB / 81559MiB | 0% Default | | | | Disabled | +-----------------------------------------+----------------------+----------------------+ +---------------------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=======================================================================================| | 0 N/A N/A 51724 C /usr/bin/python3 5776MiB | | 1 N/A N/A 51725 C /usr/bin/python3 5824MiB | | 2 N/A N/A 51726 C /usr/bin/python3 5824MiB | | 3 N/A N/A 51727 C /usr/bin/python3 5824MiB | | 4 N/A N/A 51728 C /usr/bin/python3 5824MiB | | 5 N/A N/A 51729 C /usr/bin/python3 5824MiB | | 6 N/A N/A 51730 C /usr/bin/python3 5824MiB | | 7 N/A N/A 51731 C /usr/bin/python3 5584MiB | +---------------------------------------------------------------------------------------+ ==================================================================================================== step:0/6000 val_loss:11.0149 train_time:305ms step_avg:nanms step:1/6000 train_loss:11.0091 train_time:24179ms step_avg:nanms step:2/6000 train_loss:11.0001 train_time:24523ms step_avg:nanms step:3/6000 train_loss:10.9840 train_time:24777ms step_avg:nanms step:4/6000 train_loss:10.9560 train_time:25031ms step_avg:nanms step:5/6000 train_loss:10.9093 train_time:25286ms step_avg:nanms step:6/6000 train_loss:10.8532 train_time:25542ms step_avg:nanms step:7/6000 train_loss:10.7702 train_time:25798ms step_avg:nanms step:8/6000 train_loss:10.7071 train_time:26052ms step_avg:nanms step:9/6000 train_loss:10.5679 train_time:26307ms step_avg:nanms step:10/6000 train_loss:10.4348 train_time:26562ms step_avg:nanms step:11/6000 train_loss:10.2423 train_time:1539ms step_avg:nanms step:12/6000 train_loss:10.0687 train_time:1796ms step_avg:nanms step:13/6000 train_loss:9.8811 train_time:2049ms step_avg:682.95ms step:14/6000 train_loss:9.7510 train_time:2302ms step_avg:575.53ms step:15/6000 train_loss:9.5861 train_time:2556ms step_avg:511.21ms step:16/6000 train_loss:9.4083 train_time:2811ms step_avg:468.48ms step:17/6000 train_loss:9.2188 train_time:3064ms step_avg:437.76ms step:18/6000 train_loss:9.0430 train_time:3318ms step_avg:414.77ms step:19/6000 train_loss:8.7853 train_time:3572ms step_avg:396.86ms step:20/6000 train_loss:8.5715 train_time:3825ms step_avg:382.52ms step:21/6000 train_loss:8.2020 train_time:5150ms step_avg:468.15ms step:22/6000 train_loss:8.2182 train_time:5401ms step_avg:450.08ms step:23/6000 train_loss:8.2195 train_time:5654ms step_avg:434.90ms step:24/6000 train_loss:7.8914 train_time:5908ms step_avg:422.01ms step:25/6000 train_loss:7.8499 train_time:6162ms step_avg:410.79ms step:26/6000 train_loss:7.5862 train_time:6416ms step_avg:401.01ms step:27/6000 train_loss:7.4062 train_time:6670ms step_avg:392.36ms step:28/6000 train_loss:7.4215 train_time:6925ms step_avg:384.74ms step:29/6000 train_loss:7.1487 train_time:7179ms step_avg:377.83ms step:30/6000 train_loss:7.3110 train_time:7433ms step_avg:371.64ms step:31/6000 train_loss:7.1672 train_time:8759ms step_avg:417.07ms step:32/6000 train_loss:7.0884 train_time:9011ms step_avg:409.61ms step:33/6000 train_loss:6.8866 train_time:9264ms step_avg:402.78ms step:34/6000 train_loss:7.3137 train_time:9518ms step_avg:396.59ms step:35/6000 train_loss:7.0681 train_time:9772ms step_avg:390.87ms step:36/6000 train_loss:7.2317 train_time:10026ms step_avg:385.61ms step:37/6000 train_loss:7.1573 train_time:10280ms step_avg:380.73ms step:38/6000 train_loss:6.9996 train_time:10534ms step_avg:376.23ms step:39/6000 train_loss:6.8421 train_time:10789ms step_avg:372.03ms step:40/6000 train_loss:6.9201 train_time:11043ms step_avg:368.11ms step:41/6000 train_loss:6.7763 train_time:12370ms step_avg:399.02ms step:42/6000 train_loss:6.8024 train_time:12622ms step_avg:394.43ms step:43/6000 train_loss:6.6028 train_time:12875ms step_avg:390.15ms step:44/6000 train_loss:6.6912 train_time:13130ms step_avg:386.17ms step:45/6000 train_loss:6.6638 train_time:13383ms step_avg:382.38ms step:46/6000 train_loss:6.8520 train_time:13637ms step_avg:378.81ms step:47/6000 train_loss:6.6392 train_time:13893ms step_avg:375.49ms step:48/6000 train_loss:6.4572 train_time:14148ms step_avg:372.32ms step:49/6000 train_loss:6.7026 train_time:14402ms step_avg:369.28ms step:50/6000 train_loss:6.5423 train_time:14655ms step_avg:366.38ms step:51/6000 train_loss:6.6742 train_time:15982ms step_avg:389.81ms step:52/6000 train_loss:6.5286 train_time:16233ms step_avg:386.51ms step:53/6000 train_loss:6.3369 train_time:16488ms step_avg:383.45ms step:54/6000 train_loss:6.4820 train_time:16743ms step_avg:380.52ms step:55/6000 train_loss:6.3986 train_time:16996ms step_avg:377.69ms step:56/6000 train_loss:6.7026 train_time:17250ms step_avg:375.00ms step:57/6000 train_loss:6.3628 train_time:17504ms step_avg:372.43ms step:58/6000 train_loss:6.2367 train_time:17758ms step_avg:369.96ms step:59/6000 train_loss:6.4156 train_time:18015ms step_avg:367.64ms step:60/6000 train_loss:6.3103 train_time:18269ms step_avg:365.37ms step:61/6000 train_loss:6.4281 train_time:19596ms step_avg:384.23ms step:62/6000 train_loss:6.2143 train_time:19849ms step_avg:381.72ms step:63/6000 train_loss:6.3072 train_time:20103ms step_avg:379.31ms step:64/6000 train_loss:6.2447 train_time:20357ms step_avg:376.97ms step:65/6000 train_loss:6.5712 train_time:20613ms step_avg:374.79ms step:66/6000 train_loss:6.0816 train_time:20867ms step_avg:372.63ms step:67/6000 train_loss:6.2450 train_time:21122ms step_avg:370.56ms step:68/6000 train_loss:6.0976 train_time:21377ms step_avg:368.57ms step:69/6000 train_loss:6.4423 train_time:21631ms step_avg:366.63ms step:70/6000 train_loss:6.0097 train_time:21886ms step_avg:364.76ms step:71/6000 train_loss:6.0427 train_time:23214ms step_avg:380.56ms step:72/6000 train_loss:6.2715 train_time:23465ms step_avg:378.46ms step:73/6000 train_loss:6.1939 train_time:23720ms step_avg:376.50ms step:74/6000 train_loss:6.0827 train_time:23974ms step_avg:374.59ms step:75/6000 train_loss:6.1740 train_time:24229ms step_avg:372.75ms step:76/6000 train_loss:6.1444 train_time:24484ms step_avg:370.97ms step:77/6000 train_loss:6.1265 train_time:24738ms step_avg:369.22ms step:78/6000 train_loss:6.1888 train_time:24993ms step_avg:367.54ms step:79/6000 train_loss:6.2506 train_time:25248ms step_avg:365.92ms step:80/6000 train_loss:6.1335 train_time:25504ms step_avg:364.34ms step:81/6000 train_loss:6.1727 train_time:26829ms step_avg:377.87ms step:82/6000 train_loss:5.9197 train_time:27080ms step_avg:376.11ms step:83/6000 train_loss:6.1121 train_time:27335ms step_avg:374.45ms step:84/6000 train_loss:6.0827 train_time:27589ms step_avg:372.83ms step:85/6000 train_loss:6.0145 train_time:27844ms step_avg:371.25ms step:86/6000 train_loss:5.8746 train_time:28099ms step_avg:369.72ms step:87/6000 train_loss:6.0875 train_time:28353ms step_avg:368.22ms step:88/6000 train_loss:6.0025 train_time:28608ms step_avg:366.76ms step:89/6000 train_loss:6.0868 train_time:28862ms step_avg:365.34ms step:90/6000 train_loss:6.0603 train_time:29117ms step_avg:363.96ms step:91/6000 train_loss:5.9667 train_time:30444ms step_avg:375.85ms step:92/6000 train_loss:5.9623 train_time:30695ms step_avg:374.33ms step:93/6000 train_loss:6.0465 train_time:30949ms step_avg:372.88ms step:94/6000 train_loss:5.9184 train_time:31203ms step_avg:371.47ms step:95/6000 train_loss:5.9093 train_time:31457ms step_avg:370.08ms step:96/6000 train_loss:5.9115 train_time:31712ms step_avg:368.75ms step:97/6000 train_loss:5.8290 train_time:31967ms step_avg:367.44ms step:98/6000 train_loss:5.9103 train_time:32222ms step_avg:366.16ms step:99/6000 train_loss:5.8171 train_time:32476ms step_avg:364.90ms step:100/6000 train_loss:5.9655 train_time:32731ms step_avg:363.68ms step:101/6000 train_loss:5.9094 train_time:34057ms step_avg:374.25ms step:102/6000 train_loss:5.8104 train_time:34310ms step_avg:372.94ms step:103/6000 train_loss:5.9148 train_time:34564ms step_avg:371.65ms step:104/6000 train_loss:5.9029 train_time:34819ms step_avg:370.41ms step:105/6000 train_loss:5.6871 train_time:35073ms step_avg:369.19ms step:106/6000 train_loss:5.8425 train_time:35327ms step_avg:367.99ms step:107/6000 train_loss:6.0356 train_time:35583ms step_avg:366.83ms step:108/6000 train_loss:5.8387 train_time:35837ms step_avg:365.69ms step:109/6000 train_loss:5.5621 train_time:36093ms step_avg:364.57ms step:110/6000 train_loss:5.7987 train_time:36348ms step_avg:363.48ms step:111/6000 train_loss:5.7502 train_time:37673ms step_avg:373.00ms step:112/6000 train_loss:5.7363 train_time:37926ms step_avg:371.82ms step:113/6000 train_loss:5.8215 train_time:38181ms step_avg:370.69ms step:114/6000 train_loss:5.7681 train_time:38436ms step_avg:369.57ms step:115/6000 train_loss:5.6168 train_time:38690ms step_avg:368.48ms step:116/6000 train_loss:5.8048 train_time:38945ms step_avg:367.41ms step:117/6000 train_loss:5.6318 train_time:39200ms step_avg:366.36ms step:118/6000 train_loss:5.6412 train_time:39454ms step_avg:365.32ms step:119/6000 train_loss:5.7174 train_time:39713ms step_avg:364.34ms step:120/6000 train_loss:5.7469 train_time:39968ms step_avg:363.35ms step:121/6000 train_loss:5.6614 train_time:41294ms step_avg:372.02ms step:122/6000 train_loss:5.5687 train_time:41547ms step_avg:370.95ms step:123/6000 train_loss:5.6577 train_time:41802ms step_avg:369.93ms step:124/6000 train_loss:5.5010 train_time:42056ms step_avg:368.91ms step:125/6000 train_loss:5.8114 train_time:42310ms step_avg:367.91ms step:125/6000 val_loss:5.6402 train_time:42345ms step_avg:368.21ms step:126/6000 train_loss:5.6531 train_time:42571ms step_avg:366.99ms step:127/6000 train_loss:5.6306 train_time:42825ms step_avg:366.03ms step:128/6000 train_loss:5.7053 train_time:43081ms step_avg:365.10ms step:129/6000 train_loss:5.5463 train_time:43336ms step_avg:364.17ms step:130/6000 train_loss:5.7996 train_time:43592ms step_avg:363.27ms step:131/6000 train_loss:5.6047 train_time:44919ms step_avg:371.23ms step:132/6000 train_loss:5.6038 train_time:45170ms step_avg:370.25ms step:133/6000 train_loss:5.5227 train_time:45424ms step_avg:369.30ms step:134/6000 train_loss:5.5778 train_time:45681ms step_avg:368.40ms step:135/6000 train_loss:5.5191 train_time:45936ms step_avg:367.49ms step:136/6000 train_loss:5.5573 train_time:46192ms step_avg:366.60ms step:137/6000 train_loss:5.3690 train_time:46446ms step_avg:365.72ms step:138/6000 train_loss:5.5209 train_time:46703ms step_avg:364.87ms step:139/6000 train_loss:5.4803 train_time:46959ms step_avg:364.02ms step:140/6000 train_loss:5.4862 train_time:47213ms step_avg:363.18ms step:141/6000 train_loss:5.5063 train_time:48540ms step_avg:370.53ms step:142/6000 train_loss:5.4300 train_time:48793ms step_avg:369.65ms step:143/6000 train_loss:5.4969 train_time:49048ms step_avg:368.78ms step:144/6000 train_loss:5.2897 train_time:49302ms step_avg:367.93ms step:145/6000 train_loss:5.4569 train_time:49557ms step_avg:367.09ms step:146/6000 train_loss:5.3947 train_time:49811ms step_avg:366.26ms step:147/6000 train_loss:5.3114 train_time:50066ms step_avg:365.44ms step:148/6000 train_loss:5.4200 train_time:50320ms step_avg:364.64ms step:149/6000 train_loss:5.3768 train_time:50576ms step_avg:363.86ms step:150/6000 train_loss:5.4294 train_time:50830ms step_avg:363.07ms step:151/6000 train_loss:5.4273 train_time:52157ms step_avg:369.90ms step:152/6000 train_loss:5.3703 train_time:52408ms step_avg:369.07ms step:153/6000 train_loss:5.3351 train_time:52663ms step_avg:368.27ms step:154/6000 train_loss:5.4063 train_time:52918ms step_avg:367.48ms step:155/6000 train_loss:5.3316 train_time:53173ms step_avg:366.71ms step:156/6000 train_loss:5.3206 train_time:53428ms step_avg:365.94ms step:157/6000 train_loss:5.3173 train_time:53685ms step_avg:365.21ms step:158/6000 train_loss:5.4473 train_time:53940ms step_avg:364.46ms step:159/6000 train_loss:5.2277 train_time:54194ms step_avg:363.72ms step:160/6000 train_loss:5.2872 train_time:54449ms step_avg:362.99ms step:161/6000 train_loss:5.1428 train_time:55776ms step_avg:369.37ms step:162/6000 train_loss:5.2751 train_time:56027ms step_avg:368.60ms step:163/6000 train_loss:5.3074 train_time:56282ms step_avg:367.86ms step:164/6000 train_loss:5.2798 train_time:56536ms step_avg:367.12ms step:165/6000 train_loss:5.1078 train_time:56792ms step_avg:366.40ms step:166/6000 train_loss:5.2281 train_time:57046ms step_avg:365.68ms step:167/6000 train_loss:5.3608 train_time:57301ms step_avg:364.98ms step:168/6000 train_loss:5.1498 train_time:57557ms step_avg:364.28ms step:169/6000 train_loss:5.2199 train_time:57811ms step_avg:363.59ms step:170/6000 train_loss:5.0916 train_time:58066ms step_avg:362.91ms step:171/6000 train_loss:5.0212 train_time:59393ms step_avg:368.90ms step:172/6000 train_loss:5.1327 train_time:59644ms step_avg:368.17ms step:173/6000 train_loss:5.0972 train_time:59899ms step_avg:367.48ms step:174/6000 train_loss:5.1478 train_time:60154ms step_avg:366.79ms step:175/6000 train_loss:5.2778 train_time:60407ms step_avg:366.10ms step:176/6000 train_loss:5.1680 train_time:60662ms step_avg:365.44ms step:177/6000 train_loss:5.0107 train_time:60917ms step_avg:364.77ms step:178/6000 train_loss:4.9922 train_time:61173ms step_avg:364.12ms step:179/6000 train_loss:5.0179 train_time:61427ms step_avg:363.47ms step:180/6000 train_loss:5.0609 train_time:61683ms step_avg:362.84ms step:181/6000 train_loss:5.0356 train_time:63009ms step_avg:368.47ms step:182/6000 train_loss:5.1515 train_time:63262ms step_avg:367.80ms step:183/6000 train_loss:5.0427 train_time:63515ms step_avg:367.14ms step:184/6000 train_loss:4.9538 train_time:63770ms step_avg:366.49ms step:185/6000 train_loss:4.9792 train_time:64024ms step_avg:365.85ms step:186/6000 train_loss:5.1032 train_time:64278ms step_avg:365.22ms step:187/6000 train_loss:4.9836 train_time:64533ms step_avg:364.59ms step:188/6000 train_loss:5.2250 train_time:64787ms step_avg:363.97ms step:189/6000 train_loss:5.0166 train_time:65178ms step_avg:364.12ms step:190/6000 train_loss:4.9305 train_time:65595ms step_avg:364.42ms step:191/6000 train_loss:5.1003 train_time:66920ms step_avg:369.73ms step:192/6000 train_loss:4.9391 train_time:67174ms step_avg:369.09ms step:193/6000 train_loss:4.8408 train_time:67428ms step_avg:368.46ms step:194/6000 train_loss:5.0318 train_time:67683ms step_avg:367.84ms step:195/6000 train_loss:4.9867 train_time:67937ms step_avg:367.23ms step:196/6000 train_loss:5.1711 train_time:68192ms step_avg:366.62ms step:197/6000 train_loss:5.0669 train_time:68447ms step_avg:366.03ms step:198/6000 train_loss:4.8918 train_time:68702ms step_avg:365.44ms step:199/6000 train_loss:4.9360 train_time:68957ms step_avg:364.85ms step:200/6000 train_loss:4.8234 train_time:69213ms step_avg:364.28ms step:201/6000 train_loss:4.8992 train_time:70539ms step_avg:369.31ms step:202/6000 train_loss:4.8157 train_time:70793ms step_avg:368.71ms step:203/6000 train_loss:5.0449 train_time:71047ms step_avg:368.12ms step:204/6000 train_loss:4.9609 train_time:71302ms step_avg:367.53ms step:205/6000 train_loss:4.9075 train_time:71557ms step_avg:366.96ms step:206/6000 train_loss:5.0643 train_time:71812ms step_avg:366.39ms step:207/6000 train_loss:4.7360 train_time:72067ms step_avg:365.82ms step:208/6000 train_loss:4.8821 train_time:72321ms step_avg:365.26ms step:209/6000 train_loss:4.8366 train_time:72577ms step_avg:364.71ms step:210/6000 train_loss:5.0043 train_time:72831ms step_avg:364.16ms step:211/6000 train_loss:4.9239 train_time:74157ms step_avg:368.94ms step:212/6000 train_loss:4.8044 train_time:74410ms step_avg:368.37ms step:213/6000 train_loss:4.9524 train_time:74664ms step_avg:367.80ms step:214/6000 train_loss:4.7672 train_time:74917ms step_avg:367.24ms step:215/6000 train_loss:4.8557 train_time:75172ms step_avg:366.69ms step:216/6000 train_loss:4.7127 train_time:75427ms step_avg:366.15ms step:217/6000 train_loss:4.8568 train_time:75682ms step_avg:365.61ms step:218/6000 train_loss:4.8064 train_time:75937ms step_avg:365.08ms step:219/6000 train_loss:4.7861 train_time:76191ms step_avg:364.55ms step:220/6000 train_loss:4.8014 train_time:76446ms step_avg:364.03ms step:221/6000 train_loss:4.8319 train_time:77774ms step_avg:368.60ms step:222/6000 train_loss:4.8711 train_time:78024ms step_avg:368.04ms step:223/6000 train_loss:4.7985 train_time:78279ms step_avg:367.51ms step:224/6000 train_loss:4.8030 train_time:78534ms step_avg:366.98ms step:225/6000 train_loss:4.9212 train_time:78789ms step_avg:366.46ms step:226/6000 train_loss:4.6555 train_time:79044ms step_avg:365.94ms step:227/6000 train_loss:4.6893 train_time:79299ms step_avg:365.43ms step:228/6000 train_loss:4.6743 train_time:79555ms step_avg:364.93ms step:229/6000 train_loss:4.8447 train_time:79809ms step_avg:364.43ms step:230/6000 train_loss:4.6810 train_time:80065ms step_avg:363.93ms step:231/6000 train_loss:4.8108 train_time:81391ms step_avg:368.29ms step:232/6000 train_loss:4.6661 train_time:81644ms step_avg:367.76ms step:233/6000 train_loss:4.6326 train_time:81899ms step_avg:367.26ms step:234/6000 train_loss:4.8332 train_time:82154ms step_avg:366.76ms step:235/6000 train_loss:4.6774 train_time:82408ms step_avg:366.26ms step:236/6000 train_loss:4.5779 train_time:82663ms step_avg:365.76ms step:237/6000 train_loss:4.8505 train_time:82917ms step_avg:365.27ms step:238/6000 train_loss:4.7458 train_time:83171ms step_avg:364.79ms step:239/6000 train_loss:4.6419 train_time:83426ms step_avg:364.31ms step:240/6000 train_loss:4.7907 train_time:83681ms step_avg:363.83ms step:241/6000 train_loss:4.7721 train_time:85008ms step_avg:368.00ms step:242/6000 train_loss:4.6648 train_time:85261ms step_avg:367.50ms step:243/6000 train_loss:4.8297 train_time:85515ms step_avg:367.02ms step:244/6000 train_loss:4.6623 train_time:85770ms step_avg:366.54ms step:245/6000 train_loss:4.6783 train_time:86026ms step_avg:366.07ms step:246/6000 train_loss:4.7539 train_time:86281ms step_avg:365.60ms step:247/6000 train_loss:4.6973 train_time:86535ms step_avg:365.13ms step:248/6000 train_loss:4.6430 train_time:86790ms step_avg:364.66ms step:249/6000 train_loss:4.8146 train_time:87045ms step_avg:364.20ms step:250/6000 train_loss:4.5445 train_time:87300ms step_avg:363.75ms step:250/6000 val_loss:4.6476 train_time:87335ms step_avg:363.90ms step:251/6000 train_loss:4.5773 train_time:88630ms step_avg:367.76ms step:252/6000 train_loss:4.7066 train_time:88880ms step_avg:367.27ms step:253/6000 train_loss:4.7037 train_time:89135ms step_avg:366.81ms step:254/6000 train_loss:4.5615 train_time:89390ms step_avg:366.35ms step:255/6000 train_loss:4.5464 train_time:89645ms step_avg:365.90ms step:256/6000 train_loss:4.7082 train_time:89901ms step_avg:365.45ms step:257/6000 train_loss:4.6305 train_time:90155ms step_avg:365.00ms step:258/6000 train_loss:4.6148 train_time:90411ms step_avg:364.56ms step:259/6000 train_loss:4.5526 train_time:90665ms step_avg:364.12ms step:260/6000 train_loss:4.5803 train_time:90921ms step_avg:363.68ms step:261/6000 train_loss:4.6363 train_time:92247ms step_avg:367.52ms step:262/6000 train_loss:4.6223 train_time:92501ms step_avg:367.07ms step:263/6000 train_loss:4.5488 train_time:92755ms step_avg:366.62ms step:264/6000 train_loss:4.4761 train_time:93010ms step_avg:366.18ms step:265/6000 train_loss:4.5463 train_time:93265ms step_avg:365.74ms step:266/6000 train_loss:4.3892 train_time:93521ms step_avg:365.32ms step:267/6000 train_loss:4.4675 train_time:93775ms step_avg:364.88ms step:268/6000 train_loss:4.4920 train_time:94031ms step_avg:364.46ms step:269/6000 train_loss:4.4711 train_time:94285ms step_avg:364.04ms step:270/6000 train_loss:4.4200 train_time:94541ms step_avg:363.62ms step:271/6000 train_loss:4.6400 train_time:95867ms step_avg:367.31ms step:272/6000 train_loss:4.5479 train_time:96121ms step_avg:366.88ms step:273/6000 train_loss:4.4276 train_time:96375ms step_avg:366.44ms step:274/6000 train_loss:4.4757 train_time:96631ms step_avg:366.03ms step:275/6000 train_loss:4.5613 train_time:96885ms step_avg:365.60ms step:276/6000 train_loss:4.5758 train_time:97138ms step_avg:365.18ms step:277/6000 train_loss:4.7663 train_time:97393ms step_avg:364.77ms step:278/6000 train_loss:4.5357 train_time:97648ms step_avg:364.36ms step:279/6000 train_loss:4.6308 train_time:97903ms step_avg:363.95ms step:280/6000 train_loss:4.5055 train_time:98158ms step_avg:363.55ms step:281/6000 train_loss:4.5766 train_time:99484ms step_avg:367.10ms step:282/6000 train_loss:4.4674 train_time:99736ms step_avg:366.68ms step:283/6000 train_loss:4.5062 train_time:99990ms step_avg:366.26ms step:284/6000 train_loss:4.4054 train_time:100245ms step_avg:365.86ms step:285/6000 train_loss:4.5545 train_time:100500ms step_avg:365.45ms step:286/6000 train_loss:4.5568 train_time:100754ms step_avg:365.05ms step:287/6000 train_loss:4.5765 train_time:101009ms step_avg:364.65ms step:288/6000 train_loss:4.4091 train_time:101263ms step_avg:364.26ms step:289/6000 train_loss:4.5024 train_time:101518ms step_avg:363.87ms step:290/6000 train_loss:4.3563 train_time:101773ms step_avg:363.47ms step:291/6000 train_loss:4.3512 train_time:103100ms step_avg:366.90ms step:292/6000 train_loss:4.4541 train_time:103351ms step_avg:366.49ms step:293/6000 train_loss:4.3527 train_time:103605ms step_avg:366.10ms step:294/6000 train_loss:4.3881 train_time:103859ms step_avg:365.70ms step:295/6000 train_loss:4.4298 train_time:104114ms step_avg:365.31ms step:296/6000 train_loss:4.3016 train_time:104367ms step_avg:364.92ms step:297/6000 train_loss:4.3110 train_time:104623ms step_avg:364.54ms step:298/6000 train_loss:4.3162 train_time:104878ms step_avg:364.16ms step:299/6000 train_loss:4.4236 train_time:105132ms step_avg:363.78ms step:300/6000 train_loss:4.2981 train_time:105388ms step_avg:363.41ms step:301/6000 train_loss:4.4472 train_time:106716ms step_avg:366.72ms step:302/6000 train_loss:4.4425 train_time:106968ms step_avg:366.33ms step:303/6000 train_loss:4.3758 train_time:107223ms step_avg:365.95ms step:304/6000 train_loss:4.4334 train_time:107477ms step_avg:365.57ms step:305/6000 train_loss:4.4173 train_time:107732ms step_avg:365.19ms step:306/6000 train_loss:4.8787 train_time:107986ms step_avg:364.82ms step:307/6000 train_loss:4.3786 train_time:108241ms step_avg:364.45ms step:308/6000 train_loss:4.2877 train_time:108496ms step_avg:364.08ms step:309/6000 train_loss:4.4621 train_time:108750ms step_avg:363.71ms step:310/6000 train_loss:4.2722 train_time:109006ms step_avg:363.35ms step:311/6000 train_loss:4.5166 train_time:110333ms step_avg:366.55ms step:312/6000 train_loss:4.3835 train_time:110585ms step_avg:366.18ms step:313/6000 train_loss:4.3130 train_time:110840ms step_avg:365.81ms step:314/6000 train_loss:4.4298 train_time:111094ms step_avg:365.44ms step:315/6000 train_loss:4.5203 train_time:111347ms step_avg:365.07ms step:316/6000 train_loss:4.3887 train_time:111603ms step_avg:364.72ms step:317/6000 train_loss:4.2314 train_time:111858ms step_avg:364.36ms step:318/6000 train_loss:4.2997 train_time:112112ms step_avg:364.00ms step:319/6000 train_loss:4.3261 train_time:112368ms step_avg:363.65ms step:320/6000 train_loss:4.2952 train_time:112623ms step_avg:363.30ms step:321/6000 train_loss:4.4009 train_time:113950ms step_avg:366.40ms step:322/6000 train_loss:4.3711 train_time:114204ms step_avg:366.04ms step:323/6000 train_loss:4.3335 train_time:114458ms step_avg:365.68ms step:324/6000 train_loss:4.4147 train_time:114713ms step_avg:365.33ms step:325/6000 train_loss:4.3837 train_time:114968ms step_avg:364.98ms step:326/6000 train_loss:4.4486 train_time:115225ms step_avg:364.64ms step:327/6000 train_loss:4.3003 train_time:115479ms step_avg:364.29ms step:328/6000 train_loss:4.8009 train_time:115734ms step_avg:363.94ms step:329/6000 train_loss:4.4672 train_time:115988ms step_avg:363.60ms step:330/6000 train_loss:4.2288 train_time:116243ms step_avg:363.26ms step:331/6000 train_loss:4.1729 train_time:117570ms step_avg:366.26ms step:332/6000 train_loss:4.3868 train_time:117822ms step_avg:365.91ms step:333/6000 train_loss:4.3006 train_time:118076ms step_avg:365.56ms step:334/6000 train_loss:4.2874 train_time:118331ms step_avg:365.22ms step:335/6000 train_loss:4.2487 train_time:118586ms step_avg:364.88ms step:336/6000 train_loss:4.4178 train_time:118840ms step_avg:364.54ms step:337/6000 train_loss:4.3555 train_time:119095ms step_avg:364.21ms step:338/6000 train_loss:4.8406 train_time:119349ms step_avg:363.87ms step:339/6000 train_loss:4.3415 train_time:119604ms step_avg:363.54ms step:340/6000 train_loss:4.2996 train_time:119858ms step_avg:363.21ms step:341/6000 train_loss:4.3204 train_time:121185ms step_avg:366.12ms step:342/6000 train_loss:4.2356 train_time:121438ms step_avg:365.78ms step:343/6000 train_loss:4.2095 train_time:121692ms step_avg:365.44ms step:344/6000 train_loss:4.2680 train_time:121947ms step_avg:365.11ms step:345/6000 train_loss:4.3880 train_time:122201ms step_avg:364.78ms step:346/6000 train_loss:4.2287 train_time:122455ms step_avg:364.45ms step:347/6000 train_loss:4.1697 train_time:122710ms step_avg:364.12ms step:348/6000 train_loss:4.2217 train_time:122964ms step_avg:363.80ms step:349/6000 train_loss:4.2535 train_time:123219ms step_avg:363.48ms step:350/6000 train_loss:4.2080 train_time:123473ms step_avg:363.16ms step:351/6000 train_loss:3.9181 train_time:124800ms step_avg:365.98ms step:352/6000 train_loss:4.1929 train_time:125052ms step_avg:365.65ms step:353/6000 train_loss:4.5241 train_time:125307ms step_avg:365.33ms step:354/6000 train_loss:4.0613 train_time:125561ms step_avg:365.00ms step:355/6000 train_loss:4.3063 train_time:125816ms step_avg:364.68ms step:356/6000 train_loss:4.1894 train_time:126070ms step_avg:364.36ms step:357/6000 train_loss:4.2673 train_time:126325ms step_avg:364.05ms step:358/6000 train_loss:4.2496 train_time:126580ms step_avg:363.74ms step:359/6000 train_loss:4.2286 train_time:126835ms step_avg:363.42ms step:360/6000 train_loss:4.2643 train_time:127090ms step_avg:363.11ms step:361/6000 train_loss:3.8567 train_time:128416ms step_avg:365.86ms step:362/6000 train_loss:4.4132 train_time:128668ms step_avg:365.53ms step:363/6000 train_loss:4.3039 train_time:128922ms step_avg:365.22ms step:364/6000 train_loss:4.2148 train_time:129177ms step_avg:364.91ms step:365/6000 train_loss:4.1368 train_time:129431ms step_avg:364.60ms step:366/6000 train_loss:4.2921 train_time:129686ms step_avg:364.29ms step:367/6000 train_loss:4.2486 train_time:129942ms step_avg:363.98ms step:368/6000 train_loss:4.2276 train_time:130197ms step_avg:363.68ms step:369/6000 train_loss:4.2151 train_time:130452ms step_avg:363.37ms step:370/6000 train_loss:4.1055 train_time:130707ms step_avg:363.07ms step:371/6000 train_loss:4.2654 train_time:132035ms step_avg:365.75ms step:372/6000 train_loss:4.1486 train_time:132287ms step_avg:365.43ms step:373/6000 train_loss:4.0549 train_time:132542ms step_avg:365.13ms step:374/6000 train_loss:4.2747 train_time:132796ms step_avg:364.82ms step:375/6000 train_loss:4.2032 train_time:133049ms step_avg:364.52ms step:375/6000 val_loss:4.1989 train_time:133084ms step_avg:364.61ms step:376/6000 train_loss:4.1666 train_time:133309ms step_avg:364.23ms step:377/6000 train_loss:4.2298 train_time:133564ms step_avg:363.94ms step:378/6000 train_loss:4.1429 train_time:133954ms step_avg:364.01ms step:379/6000 train_loss:4.2073 train_time:134209ms step_avg:363.71ms step:380/6000 train_loss:4.2467 train_time:134613ms step_avg:363.82ms step:381/6000 train_loss:4.3071 train_time:135940ms step_avg:366.42ms step:382/6000 train_loss:4.2190 train_time:136192ms step_avg:366.11ms step:383/6000 train_loss:4.1987 train_time:136448ms step_avg:365.81ms step:384/6000 train_loss:4.1364 train_time:136703ms step_avg:365.51ms step:385/6000 train_loss:4.2300 train_time:136958ms step_avg:365.22ms step:386/6000 train_loss:4.1377 train_time:137211ms step_avg:364.92ms step:387/6000 train_loss:4.2609 train_time:137468ms step_avg:364.64ms step:388/6000 train_loss:4.4547 train_time:137724ms step_avg:364.35ms step:389/6000 train_loss:4.1562 train_time:137976ms step_avg:364.05ms step:390/6000 train_loss:4.1397 train_time:138232ms step_avg:363.77ms step:391/6000 train_loss:4.2455 train_time:139558ms step_avg:366.29ms step:392/6000 train_loss:4.1706 train_time:139810ms step_avg:365.99ms step:393/6000 train_loss:4.2659 train_time:140064ms step_avg:365.70ms step:394/6000 train_loss:4.0920 train_time:140319ms step_avg:365.41ms step:395/6000 train_loss:4.2389 train_time:140575ms step_avg:365.13ms step:396/6000 train_loss:3.9818 train_time:140830ms step_avg:364.84ms step:397/6000 train_loss:4.1777 train_time:141084ms step_avg:364.56ms step:398/6000 train_loss:4.2511 train_time:141339ms step_avg:364.28ms step:399/6000 train_loss:4.2379 train_time:141593ms step_avg:363.99ms step:400/6000 train_loss:4.1373 train_time:141849ms step_avg:363.72ms step:401/6000 train_loss:4.2188 train_time:143176ms step_avg:366.18ms step:402/6000 train_loss:4.2507 train_time:143428ms step_avg:365.89ms step:403/6000 train_loss:4.1995 train_time:143682ms step_avg:365.60ms step:404/6000 train_loss:4.2993 train_time:143936ms step_avg:365.32ms step:405/6000 train_loss:4.0633 train_time:144190ms step_avg:365.04ms step:406/6000 train_loss:4.1316 train_time:144446ms step_avg:364.76ms step:407/6000 train_loss:4.4174 train_time:144700ms step_avg:364.48ms step:408/6000 train_loss:4.1435 train_time:144954ms step_avg:364.21ms step:409/6000 train_loss:4.1644 train_time:145209ms step_avg:363.93ms step:410/6000 train_loss:4.2099 train_time:145464ms step_avg:363.66ms step:411/6000 train_loss:4.0885 train_time:146789ms step_avg:366.06ms step:412/6000 train_loss:4.1112 train_time:147043ms step_avg:365.78ms step:413/6000 train_loss:4.5258 train_time:147297ms step_avg:365.50ms step:414/6000 train_loss:3.9728 train_time:147551ms step_avg:365.23ms step:415/6000 train_loss:4.3576 train_time:147806ms step_avg:364.95ms step:416/6000 train_loss:4.1033 train_time:148061ms step_avg:364.68ms step:417/6000 train_loss:4.1016 train_time:148315ms step_avg:364.41ms step:418/6000 train_loss:4.2987 train_time:148570ms step_avg:364.14ms step:419/6000 train_loss:4.0250 train_time:148825ms step_avg:363.88ms step:420/6000 train_loss:4.1363 train_time:149080ms step_avg:363.61ms step:421/6000 train_loss:4.0841 train_time:150407ms step_avg:365.95ms step:422/6000 train_loss:3.9811 train_time:150660ms step_avg:365.68ms step:423/6000 train_loss:4.1104 train_time:150914ms step_avg:365.41ms step:424/6000 train_loss:4.2033 train_time:151168ms step_avg:365.14ms step:425/6000 train_loss:3.9664 train_time:151423ms step_avg:364.87ms step:426/6000 train_loss:4.1557 train_time:151678ms step_avg:364.61ms step:427/6000 train_loss:4.0255 train_time:151933ms step_avg:364.35ms step:428/6000 train_loss:4.2407 train_time:152188ms step_avg:364.09ms step:429/6000 train_loss:4.1578 train_time:152444ms step_avg:363.83ms step:430/6000 train_loss:4.0903 train_time:152699ms step_avg:363.57ms step:431/6000 train_loss:4.0587 train_time:154025ms step_avg:365.85ms step:432/6000 train_loss:3.9735 train_time:154276ms step_avg:365.58ms step:433/6000 train_loss:4.0887 train_time:154530ms step_avg:365.32ms step:434/6000 train_loss:4.1624 train_time:154785ms step_avg:365.06ms step:435/6000 train_loss:4.0921 train_time:155041ms step_avg:364.80ms step:436/6000 train_loss:4.1460 train_time:155296ms step_avg:364.54ms step:437/6000 train_loss:4.1505 train_time:155551ms step_avg:364.29ms step:438/6000 train_loss:4.0485 train_time:155806ms step_avg:364.03ms step:439/6000 train_loss:4.0559 train_time:156062ms step_avg:363.78ms step:440/6000 train_loss:4.0313 train_time:156316ms step_avg:363.53ms step:441/6000 train_loss:4.2033 train_time:157643ms step_avg:365.76ms step:442/6000 train_loss:4.0981 train_time:157895ms step_avg:365.50ms step:443/6000 train_loss:4.0862 train_time:158151ms step_avg:365.24ms step:444/6000 train_loss:3.9713 train_time:158405ms step_avg:364.99ms step:445/6000 train_loss:4.2376 train_time:158660ms step_avg:364.74ms step:446/6000 train_loss:4.1672 train_time:158914ms step_avg:364.48ms step:447/6000 train_loss:4.1700 train_time:159169ms step_avg:364.23ms step:448/6000 train_loss:4.0773 train_time:159424ms step_avg:363.98ms step:449/6000 train_loss:4.1753 train_time:159679ms step_avg:363.73ms step:450/6000 train_loss:3.9962 train_time:159934ms step_avg:363.49ms step:451/6000 train_loss:4.0501 train_time:161260ms step_avg:365.67ms step:452/6000 train_loss:3.9144 train_time:161511ms step_avg:365.41ms step:453/6000 train_loss:4.0238 train_time:161765ms step_avg:365.16ms step:454/6000 train_loss:3.9999 train_time:162020ms step_avg:364.91ms step:455/6000 train_loss:3.9664 train_time:162275ms step_avg:364.66ms step:456/6000 train_loss:4.1702 train_time:162529ms step_avg:364.42ms step:457/6000 train_loss:4.0439 train_time:162784ms step_avg:364.17ms step:458/6000 train_loss:4.1145 train_time:163037ms step_avg:363.92ms step:459/6000 train_loss:4.1654 train_time:163292ms step_avg:363.68ms step:460/6000 train_loss:3.9573 train_time:163547ms step_avg:363.44ms step:461/6000 train_loss:4.1329 train_time:164874ms step_avg:365.57ms step:462/6000 train_loss:4.0258 train_time:165128ms step_avg:365.33ms step:463/6000 train_loss:4.0343 train_time:165382ms step_avg:365.08ms step:464/6000 train_loss:4.1033 train_time:165636ms step_avg:364.84ms step:465/6000 train_loss:4.0332 train_time:165891ms step_avg:364.59ms step:466/6000 train_loss:4.0427 train_time:166145ms step_avg:364.35ms step:467/6000 train_loss:4.1451 train_time:166400ms step_avg:364.11ms step:468/6000 train_loss:4.1474 train_time:166655ms step_avg:363.88ms step:469/6000 train_loss:4.1259 train_time:166909ms step_avg:363.64ms step:470/6000 train_loss:4.0175 train_time:167164ms step_avg:363.40ms step:471/6000 train_loss:4.1010 train_time:168491ms step_avg:365.49ms step:472/6000 train_loss:4.1529 train_time:168743ms step_avg:365.24ms step:473/6000 train_loss:4.0790 train_time:168998ms step_avg:365.01ms step:474/6000 train_loss:4.0387 train_time:169253ms step_avg:364.77ms step:475/6000 train_loss:3.9018 train_time:169507ms step_avg:364.53ms step:476/6000 train_loss:4.3334 train_time:169762ms step_avg:364.30ms step:477/6000 train_loss:4.0895 train_time:170017ms step_avg:364.06ms step:478/6000 train_loss:3.9027 train_time:170272ms step_avg:363.83ms step:479/6000 train_loss:4.1249 train_time:170527ms step_avg:363.60ms step:480/6000 train_loss:4.0800 train_time:170782ms step_avg:363.37ms step:481/6000 train_loss:4.2284 train_time:172108ms step_avg:365.41ms step:482/6000 train_loss:4.0416 train_time:172362ms step_avg:365.17ms step:483/6000 train_loss:3.8408 train_time:172616ms step_avg:364.94ms step:484/6000 train_loss:4.1237 train_time:172871ms step_avg:364.71ms step:485/6000 train_loss:3.9848 train_time:173126ms step_avg:364.48ms step:486/6000 train_loss:3.9941 train_time:173381ms step_avg:364.25ms step:487/6000 train_loss:3.9194 train_time:173637ms step_avg:364.02ms step:488/6000 train_loss:3.9773 train_time:173891ms step_avg:363.79ms step:489/6000 train_loss:4.1781 train_time:174149ms step_avg:363.57ms step:490/6000 train_loss:4.0285 train_time:174402ms step_avg:363.34ms step:491/6000 train_loss:3.9159 train_time:175727ms step_avg:365.34ms step:492/6000 train_loss:3.9266 train_time:175982ms step_avg:365.11ms step:493/6000 train_loss:4.0471 train_time:176237ms step_avg:364.88ms step:494/6000 train_loss:3.8868 train_time:176491ms step_avg:364.65ms step:495/6000 train_loss:4.0314 train_time:176746ms step_avg:364.42ms step:496/6000 train_loss:3.9631 train_time:176999ms step_avg:364.20ms step:497/6000 train_loss:3.8636 train_time:177255ms step_avg:363.97ms step:498/6000 train_loss:4.0436 train_time:177508ms step_avg:363.75ms step:499/6000 train_loss:4.1193 train_time:177763ms step_avg:363.52ms step:500/6000 train_loss:4.1525 train_time:178017ms step_avg:363.30ms step:500/6000 val_loss:4.0219 train_time:178052ms step_avg:363.37ms step:501/6000 train_loss:4.0519 train_time:179348ms step_avg:365.27ms step:502/6000 train_loss:4.1058 train_time:179603ms step_avg:365.05ms step:503/6000 train_loss:4.0516 train_time:179857ms step_avg:364.82ms step:504/6000 train_loss:4.0881 train_time:180112ms step_avg:364.60ms step:505/6000 train_loss:4.0485 train_time:180367ms step_avg:364.38ms step:506/6000 train_loss:4.1279 train_time:180621ms step_avg:364.16ms step:507/6000 train_loss:3.9394 train_time:180876ms step_avg:363.94ms step:508/6000 train_loss:4.0724 train_time:181131ms step_avg:363.72ms step:509/6000 train_loss:4.1494 train_time:181387ms step_avg:363.50ms step:510/6000 train_loss:4.0820 train_time:181642ms step_avg:363.28ms step:511/6000 train_loss:3.8921 train_time:182968ms step_avg:365.21ms step:512/6000 train_loss:4.0902 train_time:183222ms step_avg:364.98ms step:513/6000 train_loss:4.0298 train_time:183477ms step_avg:364.77ms step:514/6000 train_loss:3.9884 train_time:183732ms step_avg:364.55ms step:515/6000 train_loss:4.1199 train_time:183987ms step_avg:364.33ms step:516/6000 train_loss:4.0514 train_time:184241ms step_avg:364.11ms step:517/6000 train_loss:4.3948 train_time:184497ms step_avg:363.90ms step:518/6000 train_loss:3.9883 train_time:184752ms step_avg:363.68ms step:519/6000 train_loss:4.1055 train_time:185009ms step_avg:363.48ms step:520/6000 train_loss:4.0128 train_time:185264ms step_avg:363.26ms step:521/6000 train_loss:4.0003 train_time:186589ms step_avg:365.15ms step:522/6000 train_loss:3.9497 train_time:186842ms step_avg:364.93ms step:523/6000 train_loss:3.9645 train_time:187097ms step_avg:364.71ms step:524/6000 train_loss:4.5850 train_time:187353ms step_avg:364.50ms step:525/6000 train_loss:4.0527 train_time:187608ms step_avg:364.29ms step:526/6000 train_loss:3.9883 train_time:187863ms step_avg:364.08ms step:527/6000 train_loss:3.9948 train_time:188117ms step_avg:363.86ms step:528/6000 train_loss:3.9518 train_time:188372ms step_avg:363.65ms step:529/6000 train_loss:3.9274 train_time:188628ms step_avg:363.45ms step:530/6000 train_loss:4.1467 train_time:188883ms step_avg:363.24ms step:531/6000 train_loss:3.9464 train_time:190209ms step_avg:365.08ms step:532/6000 train_loss:4.2276 train_time:190462ms step_avg:364.87ms step:533/6000 train_loss:4.0366 train_time:190715ms step_avg:364.66ms step:534/6000 train_loss:3.9676 train_time:190970ms step_avg:364.45ms step:535/6000 train_loss:3.9831 train_time:191225ms step_avg:364.24ms step:536/6000 train_loss:3.9162 train_time:191480ms step_avg:364.03ms step:537/6000 train_loss:4.0422 train_time:191736ms step_avg:363.82ms step:538/6000 train_loss:4.0397 train_time:191992ms step_avg:363.62ms step:539/6000 train_loss:3.9309 train_time:192245ms step_avg:363.41ms step:540/6000 train_loss:4.4370 train_time:192501ms step_avg:363.21ms step:541/6000 train_loss:3.9700 train_time:193827ms step_avg:365.02ms step:542/6000 train_loss:4.0768 train_time:194079ms step_avg:364.81ms step:543/6000 train_loss:3.9121 train_time:194334ms step_avg:364.60ms step:544/6000 train_loss:3.8824 train_time:194588ms step_avg:364.40ms step:545/6000 train_loss:3.9790 train_time:194843ms step_avg:364.19ms step:546/6000 train_loss:3.8964 train_time:195098ms step_avg:363.99ms step:547/6000 train_loss:3.9470 train_time:195352ms step_avg:363.78ms step:548/6000 train_loss:3.9599 train_time:195607ms step_avg:363.58ms step:549/6000 train_loss:3.9383 train_time:195862ms step_avg:363.38ms step:550/6000 train_loss:4.0245 train_time:196116ms step_avg:363.18ms step:551/6000 train_loss:3.9044 train_time:197443ms step_avg:364.96ms step:552/6000 train_loss:3.9195 train_time:197695ms step_avg:364.75ms step:553/6000 train_loss:4.2528 train_time:197950ms step_avg:364.55ms step:554/6000 train_loss:4.0528 train_time:198204ms step_avg:364.35ms step:555/6000 train_loss:4.0068 train_time:198459ms step_avg:364.15ms step:556/6000 train_loss:3.9699 train_time:198713ms step_avg:363.94ms step:557/6000 train_loss:3.9882 train_time:198968ms step_avg:363.74ms step:558/6000 train_loss:3.6497 train_time:199223ms step_avg:363.55ms step:559/6000 train_loss:3.8963 train_time:199477ms step_avg:363.35ms step:560/6000 train_loss:3.9471 train_time:199732ms step_avg:363.15ms step:561/6000 train_loss:3.9949 train_time:201060ms step_avg:364.90ms step:562/6000 train_loss:3.9042 train_time:201310ms step_avg:364.69ms step:563/6000 train_loss:3.8513 train_time:201565ms step_avg:364.49ms step:564/6000 train_loss:4.0485 train_time:201820ms step_avg:364.30ms step:565/6000 train_loss:3.8664 train_time:202074ms step_avg:364.10ms step:566/6000 train_loss:3.9883 train_time:202328ms step_avg:363.90ms step:567/6000 train_loss:3.9328 train_time:202723ms step_avg:363.95ms step:568/6000 train_loss:3.8905 train_time:202977ms step_avg:363.76ms step:569/6000 train_loss:3.9806 train_time:203231ms step_avg:363.56ms step:570/6000 train_loss:3.9505 train_time:203636ms step_avg:363.64ms step:571/6000 train_loss:3.9737 train_time:204963ms step_avg:365.35ms step:572/6000 train_loss:4.0661 train_time:205214ms step_avg:365.15ms step:573/6000 train_loss:4.0119 train_time:205468ms step_avg:364.95ms step:574/6000 train_loss:4.0145 train_time:205722ms step_avg:364.76ms step:575/6000 train_loss:4.0678 train_time:205977ms step_avg:364.56ms step:576/6000 train_loss:4.0304 train_time:206232ms step_avg:364.37ms step:577/6000 train_loss:4.0385 train_time:206487ms step_avg:364.17ms step:578/6000 train_loss:3.9830 train_time:206742ms step_avg:363.98ms step:579/6000 train_loss:3.9600 train_time:206996ms step_avg:363.79ms step:580/6000 train_loss:3.9479 train_time:207251ms step_avg:363.60ms step:581/6000 train_loss:3.8938 train_time:208576ms step_avg:365.28ms step:582/6000 train_loss:3.9230 train_time:208830ms step_avg:365.09ms step:583/6000 train_loss:4.1457 train_time:209084ms step_avg:364.89ms step:584/6000 train_loss:3.9158 train_time:209338ms step_avg:364.70ms step:585/6000 train_loss:3.8753 train_time:209593ms step_avg:364.51ms step:586/6000 train_loss:4.0615 train_time:209848ms step_avg:364.32ms step:587/6000 train_loss:3.8205 train_time:210103ms step_avg:364.13ms step:588/6000 train_loss:3.9575 train_time:210358ms step_avg:363.94ms step:589/6000 train_loss:3.9464 train_time:210613ms step_avg:363.75ms step:590/6000 train_loss:4.2954 train_time:210868ms step_avg:363.57ms step:591/6000 train_loss:4.0678 train_time:212194ms step_avg:365.22ms step:592/6000 train_loss:3.8126 train_time:212457ms step_avg:365.05ms step:593/6000 train_loss:3.8234 train_time:212710ms step_avg:364.86ms step:594/6000 train_loss:3.8217 train_time:212965ms step_avg:364.67ms step:595/6000 train_loss:3.8609 train_time:213220ms step_avg:364.48ms step:596/6000 train_loss:4.2033 train_time:213474ms step_avg:364.29ms step:597/6000 train_loss:3.9397 train_time:213729ms step_avg:364.10ms step:598/6000 train_loss:3.8700 train_time:213984ms step_avg:363.92ms step:599/6000 train_loss:3.9519 train_time:214238ms step_avg:363.73ms step:600/6000 train_loss:3.7655 train_time:214493ms step_avg:363.55ms step:601/6000 train_loss:3.8887 train_time:215819ms step_avg:365.18ms step:602/6000 train_loss:3.9171 train_time:216071ms step_avg:364.99ms step:603/6000 train_loss:3.9417 train_time:216325ms step_avg:364.80ms step:604/6000 train_loss:4.0655 train_time:216580ms step_avg:364.61ms step:605/6000 train_loss:3.9277 train_time:216833ms step_avg:364.43ms step:606/6000 train_loss:3.9075 train_time:217088ms step_avg:364.24ms step:607/6000 train_loss:3.8526 train_time:217343ms step_avg:364.06ms step:608/6000 train_loss:4.0975 train_time:217597ms step_avg:363.87ms step:609/6000 train_loss:3.9458 train_time:217852ms step_avg:363.69ms step:610/6000 train_loss:3.9016 train_time:218109ms step_avg:363.51ms step:611/6000 train_loss:4.0100 train_time:219435ms step_avg:365.12ms step:612/6000 train_loss:3.9175 train_time:219686ms step_avg:364.93ms step:613/6000 train_loss:3.8892 train_time:219940ms step_avg:364.74ms step:614/6000 train_loss:4.0500 train_time:220196ms step_avg:364.56ms step:615/6000 train_loss:4.0258 train_time:220450ms step_avg:364.38ms step:616/6000 train_loss:3.9744 train_time:220706ms step_avg:364.20ms step:617/6000 train_loss:3.9051 train_time:220960ms step_avg:364.02ms step:618/6000 train_loss:3.8560 train_time:221215ms step_avg:363.84ms step:619/6000 train_loss:3.9620 train_time:221471ms step_avg:363.66ms step:620/6000 train_loss:3.8755 train_time:221726ms step_avg:363.49ms step:621/6000 train_loss:3.8812 train_time:223053ms step_avg:365.06ms step:622/6000 train_loss:4.1821 train_time:223308ms step_avg:364.88ms step:623/6000 train_loss:3.8779 train_time:223563ms step_avg:364.70ms step:624/6000 train_loss:3.9110 train_time:223817ms step_avg:364.52ms step:625/6000 train_loss:3.9839 train_time:224072ms step_avg:364.35ms step:625/6000 val_loss:3.9130 train_time:224107ms step_avg:364.40ms step:626/6000 train_loss:4.0020 train_time:224333ms step_avg:364.18ms step:627/6000 train_loss:4.0319 train_time:224588ms step_avg:364.00ms step:628/6000 train_loss:4.0100 train_time:224844ms step_avg:363.82ms step:629/6000 train_loss:4.0657 train_time:225098ms step_avg:363.65ms step:630/6000 train_loss:3.8730 train_time:225353ms step_avg:363.47ms step:631/6000 train_loss:4.0032 train_time:226680ms step_avg:365.02ms step:632/6000 train_loss:4.0475 train_time:226931ms step_avg:364.84ms step:633/6000 train_loss:3.9429 train_time:227186ms step_avg:364.66ms step:634/6000 train_loss:3.8605 train_time:227442ms step_avg:364.49ms step:635/6000 train_loss:3.9670 train_time:227696ms step_avg:364.31ms step:636/6000 train_loss:4.2232 train_time:227950ms step_avg:364.14ms step:637/6000 train_loss:3.8098 train_time:228206ms step_avg:363.96ms step:638/6000 train_loss:3.6390 train_time:228460ms step_avg:363.79ms step:639/6000 train_loss:3.8609 train_time:228716ms step_avg:363.62ms step:640/6000 train_loss:3.8949 train_time:228971ms step_avg:363.45ms step:641/6000 train_loss:3.8615 train_time:230300ms step_avg:364.98ms step:642/6000 train_loss:3.8625 train_time:230551ms step_avg:364.80ms step:643/6000 train_loss:3.8936 train_time:230805ms step_avg:364.62ms step:644/6000 train_loss:3.9250 train_time:231061ms step_avg:364.45ms step:645/6000 train_loss:3.8411 train_time:231316ms step_avg:364.28ms step:646/6000 train_loss:4.0569 train_time:231569ms step_avg:364.10ms step:647/6000 train_loss:3.9642 train_time:231825ms step_avg:363.93ms step:648/6000 train_loss:3.9523 train_time:232080ms step_avg:363.76ms step:649/6000 train_loss:3.9818 train_time:232335ms step_avg:363.59ms step:650/6000 train_loss:4.0399 train_time:232589ms step_avg:363.42ms step:651/6000 train_loss:3.8987 train_time:233916ms step_avg:364.92ms step:652/6000 train_loss:4.0443 train_time:234168ms step_avg:364.75ms step:653/6000 train_loss:3.8670 train_time:234423ms step_avg:364.58ms step:654/6000 train_loss:3.9438 train_time:234677ms step_avg:364.40ms step:655/6000 train_loss:3.7100 train_time:234931ms step_avg:364.23ms step:656/6000 train_loss:3.8480 train_time:235186ms step_avg:364.07ms step:657/6000 train_loss:3.8558 train_time:235442ms step_avg:363.90ms step:658/6000 train_loss:3.7906 train_time:235697ms step_avg:363.73ms step:659/6000 train_loss:3.9703 train_time:235951ms step_avg:363.56ms step:660/6000 train_loss:3.8747 train_time:236207ms step_avg:363.39ms step:661/6000 train_loss:3.9575 train_time:237535ms step_avg:364.88ms step:662/6000 train_loss:4.0306 train_time:237785ms step_avg:364.70ms step:663/6000 train_loss:3.9416 train_time:238040ms step_avg:364.53ms step:664/6000 train_loss:3.8166 train_time:238295ms step_avg:364.36ms step:665/6000 train_loss:3.9079 train_time:238548ms step_avg:364.20ms step:666/6000 train_loss:3.7805 train_time:238804ms step_avg:364.03ms step:667/6000 train_loss:4.0673 train_time:239058ms step_avg:363.86ms step:668/6000 train_loss:3.9064 train_time:239314ms step_avg:363.70ms step:669/6000 train_loss:3.9041 train_time:239569ms step_avg:363.53ms step:670/6000 train_loss:3.7620 train_time:239824ms step_avg:363.37ms step:671/6000 train_loss:3.8739 train_time:241150ms step_avg:364.83ms step:672/6000 train_loss:3.8345 train_time:241404ms step_avg:364.66ms step:673/6000 train_loss:3.8557 train_time:241658ms step_avg:364.49ms step:674/6000 train_loss:4.1277 train_time:241913ms step_avg:364.33ms step:675/6000 train_loss:3.9217 train_time:242168ms step_avg:364.16ms step:676/6000 train_loss:3.9901 train_time:242423ms step_avg:364.00ms step:677/6000 train_loss:3.7654 train_time:242679ms step_avg:363.84ms step:678/6000 train_loss:3.8745 train_time:242935ms step_avg:363.68ms step:679/6000 train_loss:3.8121 train_time:243189ms step_avg:363.51ms step:680/6000 train_loss:3.9636 train_time:243444ms step_avg:363.35ms step:681/6000 train_loss:3.8652 train_time:244771ms step_avg:364.79ms step:682/6000 train_loss:3.8865 train_time:245023ms step_avg:364.62ms step:683/6000 train_loss:3.9679 train_time:245277ms step_avg:364.45ms step:684/6000 train_loss:4.0138 train_time:245532ms step_avg:364.29ms step:685/6000 train_loss:3.9016 train_time:245786ms step_avg:364.13ms step:686/6000 train_loss:3.9838 train_time:246043ms step_avg:363.97ms step:687/6000 train_loss:3.9076 train_time:246298ms step_avg:363.81ms step:688/6000 train_loss:3.9593 train_time:246553ms step_avg:363.65ms step:689/6000 train_loss:3.5898 train_time:246808ms step_avg:363.49ms step:690/6000 train_loss:3.6947 train_time:247062ms step_avg:363.33ms step:691/6000 train_loss:3.8368 train_time:248390ms step_avg:364.74ms step:692/6000 train_loss:3.7168 train_time:248642ms step_avg:364.58ms step:693/6000 train_loss:3.9253 train_time:248897ms step_avg:364.42ms step:694/6000 train_loss:3.9372 train_time:249150ms step_avg:364.26ms step:695/6000 train_loss:3.8223 train_time:249405ms step_avg:364.09ms step:696/6000 train_loss:3.8091 train_time:249659ms step_avg:363.93ms step:697/6000 train_loss:4.1331 train_time:249915ms step_avg:363.78ms step:698/6000 train_loss:3.8821 train_time:250168ms step_avg:363.62ms step:699/6000 train_loss:3.9176 train_time:250423ms step_avg:363.46ms step:700/6000 train_loss:4.0952 train_time:250678ms step_avg:363.30ms step:701/6000 train_loss:3.8529 train_time:252006ms step_avg:364.70ms step:702/6000 train_loss:3.8027 train_time:252256ms step_avg:364.53ms step:703/6000 train_loss:3.7996 train_time:252511ms step_avg:364.37ms step:704/6000 train_loss:3.7458 train_time:252765ms step_avg:364.21ms step:705/6000 train_loss:3.8369 train_time:253019ms step_avg:364.06ms step:706/6000 train_loss:3.8381 train_time:253273ms step_avg:363.90ms step:707/6000 train_loss:3.8492 train_time:253529ms step_avg:363.74ms step:708/6000 train_loss:3.9237 train_time:253786ms step_avg:363.59ms step:709/6000 train_loss:3.8665 train_time:254043ms step_avg:363.44ms step:710/6000 train_loss:3.8546 train_time:254297ms step_avg:363.28ms step:711/6000 train_loss:3.8258 train_time:255624ms step_avg:364.66ms step:712/6000 train_loss:3.8654 train_time:255875ms step_avg:364.49ms step:713/6000 train_loss:3.9255 train_time:256130ms step_avg:364.34ms step:714/6000 train_loss:3.9386 train_time:256384ms step_avg:364.18ms step:715/6000 train_loss:3.8549 train_time:256638ms step_avg:364.03ms step:716/6000 train_loss:3.8507 train_time:256893ms step_avg:363.87ms step:717/6000 train_loss:3.8613 train_time:257147ms step_avg:363.72ms step:718/6000 train_loss:4.0095 train_time:257403ms step_avg:363.56ms step:719/6000 train_loss:3.8726 train_time:257658ms step_avg:363.41ms step:720/6000 train_loss:3.9423 train_time:257913ms step_avg:363.26ms step:721/6000 train_loss:4.0922 train_time:259238ms step_avg:364.61ms step:722/6000 train_loss:3.7372 train_time:259490ms step_avg:364.45ms step:723/6000 train_loss:3.9973 train_time:259743ms step_avg:364.30ms step:724/6000 train_loss:4.0523 train_time:259998ms step_avg:364.14ms step:725/6000 train_loss:3.8420 train_time:260252ms step_avg:363.99ms step:726/6000 train_loss:3.9155 train_time:260508ms step_avg:363.84ms step:727/6000 train_loss:3.8196 train_time:260762ms step_avg:363.69ms step:728/6000 train_loss:3.8250 train_time:261016ms step_avg:363.53ms step:729/6000 train_loss:4.0031 train_time:261271ms step_avg:363.38ms step:730/6000 train_loss:3.9603 train_time:261526ms step_avg:363.23ms step:731/6000 train_loss:3.9622 train_time:262851ms step_avg:364.56ms step:732/6000 train_loss:3.8368 train_time:263105ms step_avg:364.41ms step:733/6000 train_loss:3.8617 train_time:263359ms step_avg:364.26ms step:734/6000 train_loss:4.1064 train_time:263614ms step_avg:364.11ms step:735/6000 train_loss:3.8359 train_time:263868ms step_avg:363.96ms step:736/6000 train_loss:3.8952 train_time:264123ms step_avg:363.81ms step:737/6000 train_loss:4.0174 train_time:264378ms step_avg:363.66ms step:738/6000 train_loss:3.9284 train_time:264634ms step_avg:363.51ms step:739/6000 train_loss:3.8726 train_time:264888ms step_avg:363.36ms step:740/6000 train_loss:3.7715 train_time:265143ms step_avg:363.21ms step:741/6000 train_loss:4.4120 train_time:266470ms step_avg:364.53ms step:742/6000 train_loss:3.7681 train_time:266722ms step_avg:364.37ms step:743/6000 train_loss:3.8648 train_time:266977ms step_avg:364.23ms step:744/6000 train_loss:3.8527 train_time:267231ms step_avg:364.08ms step:745/6000 train_loss:3.9064 train_time:267486ms step_avg:363.93ms step:746/6000 train_loss:3.8873 train_time:267742ms step_avg:363.78ms step:747/6000 train_loss:3.8750 train_time:267996ms step_avg:363.63ms step:748/6000 train_loss:3.8949 train_time:268251ms step_avg:363.48ms step:749/6000 train_loss:3.8354 train_time:268505ms step_avg:363.34ms step:750/6000 train_loss:3.8365 train_time:268760ms step_avg:363.19ms step:750/6000 val_loss:3.8467 train_time:268794ms step_avg:363.24ms step:751/6000 train_loss:3.8731 train_time:270092ms step_avg:364.50ms step:752/6000 train_loss:3.8343 train_time:270343ms step_avg:364.34ms step:753/6000 train_loss:3.8662 train_time:270598ms step_avg:364.20ms step:754/6000 train_loss:3.8816 train_time:270854ms step_avg:364.05ms step:755/6000 train_loss:3.8526 train_time:271108ms step_avg:363.90ms step:756/6000 train_loss:3.9275 train_time:271507ms step_avg:363.95ms step:757/6000 train_loss:3.7652 train_time:271762ms step_avg:363.80ms step:758/6000 train_loss:3.9982 train_time:272017ms step_avg:363.66ms step:759/6000 train_loss:3.9275 train_time:272274ms step_avg:363.52ms step:760/6000 train_loss:3.8451 train_time:272686ms step_avg:363.58ms step:761/6000 train_loss:3.9560 train_time:274012ms step_avg:364.86ms step:762/6000 train_loss:3.6721 train_time:274265ms step_avg:364.71ms step:763/6000 train_loss:3.8245 train_time:274519ms step_avg:364.57ms step:764/6000 train_loss:3.9394 train_time:274773ms step_avg:364.42ms step:765/6000 train_loss:3.5933 train_time:275028ms step_avg:364.28ms step:766/6000 train_loss:4.0104 train_time:275282ms step_avg:364.13ms step:767/6000 train_loss:3.8763 train_time:275538ms step_avg:363.99ms step:768/6000 train_loss:3.8189 train_time:275794ms step_avg:363.84ms step:769/6000 train_loss:3.8477 train_time:276050ms step_avg:363.70ms step:770/6000 train_loss:3.8666 train_time:276304ms step_avg:363.56ms step:771/6000 train_loss:3.9309 train_time:277630ms step_avg:364.82ms step:772/6000 train_loss:4.1468 train_time:277884ms step_avg:364.68ms step:773/6000 train_loss:3.7242 train_time:278138ms step_avg:364.53ms step:774/6000 train_loss:3.9286 train_time:278394ms step_avg:364.39ms step:775/6000 train_loss:3.9043 train_time:278649ms step_avg:364.25ms step:776/6000 train_loss:3.8740 train_time:278904ms step_avg:364.10ms step:777/6000 train_loss:3.6738 train_time:279160ms step_avg:363.96ms step:778/6000 train_loss:3.6777 train_time:279414ms step_avg:363.82ms step:779/6000 train_loss:3.7442 train_time:279670ms step_avg:363.68ms step:780/6000 train_loss:3.8336 train_time:279925ms step_avg:363.54ms step:781/6000 train_loss:3.8715 train_time:281251ms step_avg:364.79ms step:782/6000 train_loss:3.9282 train_time:281504ms step_avg:364.64ms step:783/6000 train_loss:3.8339 train_time:281759ms step_avg:364.50ms step:784/6000 train_loss:3.8415 train_time:282014ms step_avg:364.36ms step:785/6000 train_loss:3.8343 train_time:282269ms step_avg:364.22ms step:786/6000 train_loss:3.8158 train_time:282524ms step_avg:364.08ms step:787/6000 train_loss:3.7255 train_time:282778ms step_avg:363.94ms step:788/6000 train_loss:3.9608 train_time:283033ms step_avg:363.80ms step:789/6000 train_loss:3.7630 train_time:283288ms step_avg:363.66ms step:790/6000 train_loss:3.8315 train_time:283544ms step_avg:363.52ms step:791/6000 train_loss:3.8966 train_time:284869ms step_avg:364.75ms step:792/6000 train_loss:4.0257 train_time:285122ms step_avg:364.61ms step:793/6000 train_loss:4.0357 train_time:285377ms step_avg:364.47ms step:794/6000 train_loss:3.7496 train_time:285633ms step_avg:364.33ms step:795/6000 train_loss:3.8643 train_time:285890ms step_avg:364.19ms step:796/6000 train_loss:3.9095 train_time:286145ms step_avg:364.05ms step:797/6000 train_loss:4.0338 train_time:286399ms step_avg:363.91ms step:798/6000 train_loss:3.7793 train_time:286654ms step_avg:363.77ms step:799/6000 train_loss:3.9250 train_time:286908ms step_avg:363.64ms step:800/6000 train_loss:3.8211 train_time:287164ms step_avg:363.50ms step:801/6000 train_loss:3.8093 train_time:288490ms step_avg:364.72ms step:802/6000 train_loss:3.9070 train_time:288742ms step_avg:364.57ms step:803/6000 train_loss:3.7557 train_time:288996ms step_avg:364.43ms step:804/6000 train_loss:3.7857 train_time:289250ms step_avg:364.29ms step:805/6000 train_loss:3.9017 train_time:289504ms step_avg:364.16ms step:806/6000 train_loss:3.8015 train_time:289758ms step_avg:364.02ms step:807/6000 train_loss:3.8099 train_time:290014ms step_avg:363.88ms step:808/6000 train_loss:3.9155 train_time:290269ms step_avg:363.75ms step:809/6000 train_loss:3.8306 train_time:290523ms step_avg:363.61ms step:810/6000 train_loss:3.7477 train_time:290778ms step_avg:363.47ms step:811/6000 train_loss:3.8353 train_time:292105ms step_avg:364.68ms step:812/6000 train_loss:3.8698 train_time:292357ms step_avg:364.53ms step:813/6000 train_loss:3.8571 train_time:292610ms step_avg:364.40ms step:814/6000 train_loss:3.8930 train_time:292865ms step_avg:364.26ms step:815/6000 train_loss:3.8432 train_time:293120ms step_avg:364.12ms step:816/6000 train_loss:3.8231 train_time:293375ms step_avg:363.99ms step:817/6000 train_loss:3.9230 train_time:293630ms step_avg:363.85ms step:818/6000 train_loss:4.0248 train_time:293884ms step_avg:363.72ms step:819/6000 train_loss:3.7921 train_time:294140ms step_avg:363.58ms step:820/6000 train_loss:3.9961 train_time:294394ms step_avg:363.45ms step:821/6000 train_loss:3.7727 train_time:295722ms step_avg:364.64ms step:822/6000 train_loss:3.8091 train_time:295973ms step_avg:364.50ms step:823/6000 train_loss:3.9284 train_time:296228ms step_avg:364.36ms step:824/6000 train_loss:3.8503 train_time:296482ms step_avg:364.23ms step:825/6000 train_loss:3.7733 train_time:296737ms step_avg:364.09ms step:826/6000 train_loss:3.8733 train_time:296992ms step_avg:363.96ms step:827/6000 train_loss:3.7676 train_time:297248ms step_avg:363.83ms step:828/6000 train_loss:3.9932 train_time:297503ms step_avg:363.70ms step:829/6000 train_loss:3.8847 train_time:297758ms step_avg:363.56ms step:830/6000 train_loss:3.9493 train_time:298012ms step_avg:363.43ms step:831/6000 train_loss:3.7973 train_time:299338ms step_avg:364.60ms step:832/6000 train_loss:3.8488 train_time:299589ms step_avg:364.46ms step:833/6000 train_loss:3.7885 train_time:299844ms step_avg:364.33ms step:834/6000 train_loss:3.9077 train_time:300098ms step_avg:364.20ms step:835/6000 train_loss:3.7564 train_time:300353ms step_avg:364.06ms step:836/6000 train_loss:3.7251 train_time:300608ms step_avg:363.93ms step:837/6000 train_loss:3.9909 train_time:300864ms step_avg:363.80ms step:838/6000 train_loss:3.6875 train_time:301118ms step_avg:363.67ms step:839/6000 train_loss:3.8528 train_time:301373ms step_avg:363.54ms step:840/6000 train_loss:3.6984 train_time:301628ms step_avg:363.41ms step:841/6000 train_loss:3.7377 train_time:302955ms step_avg:364.57ms step:842/6000 train_loss:3.8307 train_time:303207ms step_avg:364.43ms step:843/6000 train_loss:3.8398 train_time:303462ms step_avg:364.30ms step:844/6000 train_loss:3.8407 train_time:303716ms step_avg:364.17ms step:845/6000 train_loss:3.6864 train_time:303970ms step_avg:364.04ms step:846/6000 train_loss:3.9263 train_time:304226ms step_avg:363.91ms step:847/6000 train_loss:3.7945 train_time:304480ms step_avg:363.78ms step:848/6000 train_loss:3.7576 train_time:304735ms step_avg:363.65ms step:849/6000 train_loss:3.8865 train_time:304989ms step_avg:363.51ms step:850/6000 train_loss:3.7581 train_time:305245ms step_avg:363.39ms step:851/6000 train_loss:3.7109 train_time:306570ms step_avg:364.53ms step:852/6000 train_loss:4.0151 train_time:306823ms step_avg:364.40ms step:853/6000 train_loss:3.7156 train_time:307078ms step_avg:364.27ms step:854/6000 train_loss:3.8257 train_time:307333ms step_avg:364.14ms step:855/6000 train_loss:3.9104 train_time:307588ms step_avg:364.01ms step:856/6000 train_loss:3.7906 train_time:307843ms step_avg:363.88ms step:857/6000 train_loss:3.8107 train_time:308097ms step_avg:363.75ms step:858/6000 train_loss:3.8551 train_time:308352ms step_avg:363.62ms step:859/6000 train_loss:3.7616 train_time:308607ms step_avg:363.49ms step:860/6000 train_loss:3.8331 train_time:308862ms step_avg:363.37ms step:861/6000 train_loss:3.8588 train_time:310188ms step_avg:364.50ms step:862/6000 train_loss:3.9065 train_time:310439ms step_avg:364.36ms step:863/6000 train_loss:3.8469 train_time:310693ms step_avg:364.24ms step:864/6000 train_loss:3.8317 train_time:310949ms step_avg:364.11ms step:865/6000 train_loss:3.6659 train_time:311203ms step_avg:363.98ms step:866/6000 train_loss:3.8537 train_time:311458ms step_avg:363.85ms step:867/6000 train_loss:4.1229 train_time:311713ms step_avg:363.73ms step:868/6000 train_loss:3.7037 train_time:311969ms step_avg:363.60ms step:869/6000 train_loss:3.8963 train_time:312224ms step_avg:363.47ms step:870/6000 train_loss:3.8789 train_time:312478ms step_avg:363.35ms step:871/6000 train_loss:3.7127 train_time:313806ms step_avg:364.47ms step:872/6000 train_loss:3.6805 train_time:314057ms step_avg:364.33ms step:873/6000 train_loss:3.9329 train_time:314312ms step_avg:364.21ms step:874/6000 train_loss:3.7104 train_time:314567ms step_avg:364.08ms step:875/6000 train_loss:3.4208 train_time:314822ms step_avg:363.96ms step:875/6000 val_loss:3.7888 train_time:314856ms step_avg:364.00ms step:876/6000 train_loss:3.9031 train_time:315079ms step_avg:363.83ms step:877/6000 train_loss:3.7157 train_time:315334ms step_avg:363.71ms step:878/6000 train_loss:3.8895 train_time:315589ms step_avg:363.58ms step:879/6000 train_loss:3.7502 train_time:315844ms step_avg:363.46ms step:880/6000 train_loss:3.9267 train_time:316100ms step_avg:363.33ms step:881/6000 train_loss:3.5862 train_time:317428ms step_avg:364.44ms step:882/6000 train_loss:3.7683 train_time:317680ms step_avg:364.31ms step:883/6000 train_loss:3.9517 train_time:317935ms step_avg:364.19ms step:884/6000 train_loss:4.1075 train_time:318190ms step_avg:364.06ms step:885/6000 train_loss:3.8364 train_time:318447ms step_avg:363.94ms step:886/6000 train_loss:3.7420 train_time:318704ms step_avg:363.82ms step:887/6000 train_loss:3.8364 train_time:318960ms step_avg:363.69ms step:888/6000 train_loss:4.3380 train_time:319215ms step_avg:363.57ms step:889/6000 train_loss:4.1079 train_time:319471ms step_avg:363.45ms step:890/6000 train_loss:3.7821 train_time:319725ms step_avg:363.32ms step:891/6000 train_loss:3.7974 train_time:321052ms step_avg:364.42ms step:892/6000 train_loss:3.6223 train_time:321305ms step_avg:364.29ms step:893/6000 train_loss:3.9609 train_time:321560ms step_avg:364.17ms step:894/6000 train_loss:3.6899 train_time:321814ms step_avg:364.04ms step:895/6000 train_loss:3.9539 train_time:322072ms step_avg:363.92ms step:896/6000 train_loss:3.9576 train_time:322326ms step_avg:363.80ms step:897/6000 train_loss:3.7547 train_time:322581ms step_avg:363.68ms step:898/6000 train_loss:3.7918 train_time:322836ms step_avg:363.55ms step:899/6000 train_loss:3.8438 train_time:323090ms step_avg:363.43ms step:900/6000 train_loss:3.7372 train_time:323345ms step_avg:363.31ms step:901/6000 train_loss:3.6789 train_time:324673ms step_avg:364.39ms step:902/6000 train_loss:3.8908 train_time:324924ms step_avg:364.26ms step:903/6000 train_loss:3.9001 train_time:325179ms step_avg:364.14ms step:904/6000 train_loss:3.7907 train_time:325434ms step_avg:364.02ms step:905/6000 train_loss:3.7643 train_time:325689ms step_avg:363.90ms step:906/6000 train_loss:3.7542 train_time:325943ms step_avg:363.78ms step:907/6000 train_loss:3.9856 train_time:326199ms step_avg:363.66ms step:908/6000 train_loss:3.7730 train_time:326454ms step_avg:363.53ms step:909/6000 train_loss:3.8110 train_time:326708ms step_avg:363.41ms step:910/6000 train_loss:3.7114 train_time:326964ms step_avg:363.29ms step:911/6000 train_loss:3.8191 train_time:328290ms step_avg:364.36ms step:912/6000 train_loss:3.8782 train_time:328543ms step_avg:364.24ms step:913/6000 train_loss:3.8711 train_time:328798ms step_avg:364.12ms step:914/6000 train_loss:3.7442 train_time:329052ms step_avg:364.00ms step:915/6000 train_loss:3.9945 train_time:329307ms step_avg:363.87ms step:916/6000 train_loss:3.7901 train_time:329562ms step_avg:363.76ms step:917/6000 train_loss:3.8904 train_time:329816ms step_avg:363.63ms step:918/6000 train_loss:3.8561 train_time:330071ms step_avg:363.51ms step:919/6000 train_loss:5.0667 train_time:330326ms step_avg:363.39ms step:920/6000 train_loss:3.7830 train_time:330582ms step_avg:363.28ms step:921/6000 train_loss:3.8303 train_time:331907ms step_avg:364.33ms step:922/6000 train_loss:3.7915 train_time:332161ms step_avg:364.21ms step:923/6000 train_loss:3.8503 train_time:332416ms step_avg:364.09ms step:924/6000 train_loss:3.8537 train_time:332670ms step_avg:363.97ms step:925/6000 train_loss:3.9396 train_time:332925ms step_avg:363.85ms step:926/6000 train_loss:3.9188 train_time:333181ms step_avg:363.73ms step:927/6000 train_loss:3.8128 train_time:333435ms step_avg:363.61ms step:928/6000 train_loss:3.7993 train_time:333689ms step_avg:363.50ms step:929/6000 train_loss:4.0198 train_time:333945ms step_avg:363.38ms step:930/6000 train_loss:3.8722 train_time:334200ms step_avg:363.26ms step:931/6000 train_loss:3.6507 train_time:335526ms step_avg:364.31ms step:932/6000 train_loss:3.7481 train_time:335779ms step_avg:364.19ms step:933/6000 train_loss:3.9315 train_time:336033ms step_avg:364.07ms step:934/6000 train_loss:3.6589 train_time:336287ms step_avg:363.95ms step:935/6000 train_loss:3.8251 train_time:336541ms step_avg:363.83ms step:936/6000 train_loss:3.7089 train_time:336797ms step_avg:363.71ms step:937/6000 train_loss:3.7641 train_time:337051ms step_avg:363.59ms step:938/6000 train_loss:3.8667 train_time:337305ms step_avg:363.47ms step:939/6000 train_loss:3.7901 train_time:337560ms step_avg:363.36ms step:940/6000 train_loss:3.9562 train_time:337815ms step_avg:363.24ms step:941/6000 train_loss:3.7457 train_time:339141ms step_avg:364.28ms step:942/6000 train_loss:3.8055 train_time:339394ms step_avg:364.16ms step:943/6000 train_loss:3.6064 train_time:339648ms step_avg:364.04ms step:944/6000 train_loss:3.9516 train_time:339902ms step_avg:363.92ms step:945/6000 train_loss:3.6692 train_time:340298ms step_avg:363.96ms step:946/6000 train_loss:3.6821 train_time:340552ms step_avg:363.84ms step:947/6000 train_loss:5.2862 train_time:340806ms step_avg:363.72ms step:948/6000 train_loss:3.8458 train_time:341060ms step_avg:363.60ms step:949/6000 train_loss:3.7480 train_time:341314ms step_avg:363.49ms step:950/6000 train_loss:3.6570 train_time:341721ms step_avg:363.53ms step:951/6000 train_loss:3.7097 train_time:343047ms step_avg:364.56ms step:952/6000 train_loss:3.6663 train_time:343298ms step_avg:364.44ms step:953/6000 train_loss:3.7321 train_time:343555ms step_avg:364.32ms step:954/6000 train_loss:3.8131 train_time:343809ms step_avg:364.20ms step:955/6000 train_loss:3.6834 train_time:344064ms step_avg:364.09ms step:956/6000 train_loss:3.7354 train_time:344317ms step_avg:363.97ms step:957/6000 train_loss:3.6921 train_time:344571ms step_avg:363.86ms step:958/6000 train_loss:3.7613 train_time:344826ms step_avg:363.74ms step:959/6000 train_loss:3.7524 train_time:345081ms step_avg:363.63ms step:960/6000 train_loss:3.7584 train_time:345335ms step_avg:363.51ms step:961/6000 train_loss:3.6417 train_time:346662ms step_avg:364.52ms step:962/6000 train_loss:3.9104 train_time:346912ms step_avg:364.40ms step:963/6000 train_loss:3.8609 train_time:347168ms step_avg:364.29ms step:964/6000 train_loss:4.0924 train_time:347422ms step_avg:364.17ms step:965/6000 train_loss:3.7148 train_time:347676ms step_avg:364.06ms step:966/6000 train_loss:3.7551 train_time:347931ms step_avg:363.94ms step:967/6000 train_loss:3.9735 train_time:348186ms step_avg:363.83ms step:968/6000 train_loss:3.7975 train_time:348442ms step_avg:363.72ms step:969/6000 train_loss:3.7923 train_time:348698ms step_avg:363.61ms step:970/6000 train_loss:3.8399 train_time:348952ms step_avg:363.49ms step:971/6000 train_loss:3.6530 train_time:350279ms step_avg:364.49ms step:972/6000 train_loss:3.8076 train_time:350530ms step_avg:364.38ms step:973/6000 train_loss:3.7629 train_time:350785ms step_avg:364.26ms step:974/6000 train_loss:3.8012 train_time:351039ms step_avg:364.15ms step:975/6000 train_loss:3.8738 train_time:351294ms step_avg:364.04ms step:976/6000 train_loss:3.7466 train_time:351548ms step_avg:363.92ms step:977/6000 train_loss:3.9443 train_time:351803ms step_avg:363.81ms step:978/6000 train_loss:3.8274 train_time:352058ms step_avg:363.70ms step:979/6000 train_loss:3.6577 train_time:352312ms step_avg:363.58ms step:980/6000 train_loss:3.9585 train_time:352567ms step_avg:363.47ms step:981/6000 train_loss:3.6737 train_time:353894ms step_avg:364.46ms step:982/6000 train_loss:3.8359 train_time:354146ms step_avg:364.35ms step:983/6000 train_loss:3.8280 train_time:354400ms step_avg:364.23ms step:984/6000 train_loss:3.8308 train_time:354654ms step_avg:364.12ms step:985/6000 train_loss:3.7681 train_time:354909ms step_avg:364.01ms step:986/6000 train_loss:3.8509 train_time:355164ms step_avg:363.90ms step:987/6000 train_loss:3.6690 train_time:355419ms step_avg:363.79ms step:988/6000 train_loss:3.7411 train_time:355675ms step_avg:363.68ms step:989/6000 train_loss:3.7337 train_time:355929ms step_avg:363.56ms step:990/6000 train_loss:3.6873 train_time:356184ms step_avg:363.45ms step:991/6000 train_loss:3.8906 train_time:357511ms step_avg:364.44ms step:992/6000 train_loss:3.7245 train_time:357762ms step_avg:364.32ms step:993/6000 train_loss:3.6948 train_time:358016ms step_avg:364.21ms step:994/6000 train_loss:3.7727 train_time:358272ms step_avg:364.10ms step:995/6000 train_loss:3.8499 train_time:358526ms step_avg:363.99ms step:996/6000 train_loss:3.8025 train_time:358781ms step_avg:363.88ms step:997/6000 train_loss:3.7059 train_time:359036ms step_avg:363.76ms step:998/6000 train_loss:4.0656 train_time:359290ms step_avg:363.65ms step:999/6000 train_loss:3.7155 train_time:359546ms step_avg:363.54ms step:1000/6000 train_loss:3.8389 train_time:359801ms step_avg:363.44ms step:1000/6000 val_loss:3.7416 train_time:359836ms step_avg:363.47ms step:1001/6000 train_loss:3.7117 train_time:361130ms step_avg:364.41ms step:1002/6000 train_loss:3.7665 train_time:361382ms step_avg:364.30ms step:1003/6000 train_loss:3.6475 train_time:361638ms step_avg:364.19ms step:1004/6000 train_loss:3.8339 train_time:361892ms step_avg:364.08ms step:1005/6000 train_loss:3.8837 train_time:362146ms step_avg:363.97ms step:1006/6000 train_loss:3.6535 train_time:362403ms step_avg:363.86ms step:1007/6000 train_loss:3.7365 train_time:362656ms step_avg:363.75ms step:1008/6000 train_loss:3.7043 train_time:362912ms step_avg:363.64ms step:1009/6000 train_loss:3.8249 train_time:363167ms step_avg:363.53ms step:1010/6000 train_loss:3.9247 train_time:363422ms step_avg:363.42ms step:1011/6000 train_loss:3.8192 train_time:364750ms step_avg:364.39ms step:1012/6000 train_loss:3.7879 train_time:365002ms step_avg:364.27ms step:1013/6000 train_loss:3.6522 train_time:365258ms step_avg:364.17ms step:1014/6000 train_loss:3.7863 train_time:365513ms step_avg:364.06ms step:1015/6000 train_loss:3.9072 train_time:365767ms step_avg:363.95ms step:1016/6000 train_loss:3.6092 train_time:366024ms step_avg:363.84ms step:1017/6000 train_loss:3.7060 train_time:366278ms step_avg:363.73ms step:1018/6000 train_loss:3.7157 train_time:366533ms step_avg:363.62ms step:1019/6000 train_loss:3.6501 train_time:366787ms step_avg:363.52ms step:1020/6000 train_loss:3.7921 train_time:367044ms step_avg:363.41ms step:1021/6000 train_loss:3.7070 train_time:368370ms step_avg:364.36ms step:1022/6000 train_loss:3.6369 train_time:368624ms step_avg:364.25ms step:1023/6000 train_loss:3.7400 train_time:368878ms step_avg:364.14ms step:1024/6000 train_loss:3.7720 train_time:369132ms step_avg:364.04ms step:1025/6000 train_loss:3.7438 train_time:369386ms step_avg:363.93ms step:1026/6000 train_loss:3.7632 train_time:369642ms step_avg:363.82ms step:1027/6000 train_loss:3.9283 train_time:369896ms step_avg:363.71ms step:1028/6000 train_loss:3.5996 train_time:370150ms step_avg:363.61ms step:1029/6000 train_loss:3.6577 train_time:370406ms step_avg:363.50ms step:1030/6000 train_loss:3.6192 train_time:370661ms step_avg:363.39ms step:1031/6000 train_loss:3.7802 train_time:371987ms step_avg:364.34ms step:1032/6000 train_loss:3.7719 train_time:372239ms step_avg:364.23ms step:1033/6000 train_loss:3.9499 train_time:372493ms step_avg:364.12ms step:1034/6000 train_loss:3.7586 train_time:372747ms step_avg:364.01ms step:1035/6000 train_loss:3.6841 train_time:373002ms step_avg:363.90ms step:1036/6000 train_loss:3.6979 train_time:373257ms step_avg:363.80ms step:1037/6000 train_loss:3.7696 train_time:373512ms step_avg:363.69ms step:1038/6000 train_loss:4.0773 train_time:373767ms step_avg:363.59ms step:1039/6000 train_loss:3.8912 train_time:374022ms step_avg:363.48ms step:1040/6000 train_loss:3.7939 train_time:374276ms step_avg:363.37ms step:1041/6000 train_loss:3.6857 train_time:375603ms step_avg:364.31ms step:1042/6000 train_loss:3.7526 train_time:375854ms step_avg:364.20ms step:1043/6000 train_loss:3.7957 train_time:376109ms step_avg:364.09ms step:1044/6000 train_loss:3.7167 train_time:376363ms step_avg:363.99ms step:1045/6000 train_loss:3.7182 train_time:376618ms step_avg:363.88ms step:1046/6000 train_loss:3.8087 train_time:376873ms step_avg:363.78ms step:1047/6000 train_loss:3.7105 train_time:377127ms step_avg:363.67ms step:1048/6000 train_loss:3.9136 train_time:377382ms step_avg:363.57ms step:1049/6000 train_loss:3.7663 train_time:377638ms step_avg:363.46ms step:1050/6000 train_loss:3.6974 train_time:377893ms step_avg:363.36ms step:1051/6000 train_loss:3.6608 train_time:379219ms step_avg:364.28ms step:1052/6000 train_loss:3.7821 train_time:379470ms step_avg:364.18ms step:1053/6000 train_loss:3.6643 train_time:379725ms step_avg:364.07ms step:1054/6000 train_loss:3.9707 train_time:379979ms step_avg:363.96ms step:1055/6000 train_loss:3.8173 train_time:380233ms step_avg:363.86ms step:1056/6000 train_loss:3.6691 train_time:380488ms step_avg:363.76ms step:1057/6000 train_loss:3.7724 train_time:380745ms step_avg:363.65ms step:1058/6000 train_loss:3.8497 train_time:380999ms step_avg:363.55ms step:1059/6000 train_loss:3.5741 train_time:381254ms step_avg:363.45ms step:1060/6000 train_loss:3.7024 train_time:381509ms step_avg:363.34ms step:1061/6000 train_loss:3.7268 train_time:382834ms step_avg:364.26ms step:1062/6000 train_loss:3.6927 train_time:383087ms step_avg:364.15ms step:1063/6000 train_loss:3.6554 train_time:383342ms step_avg:364.05ms step:1064/6000 train_loss:3.7683 train_time:383596ms step_avg:363.94ms step:1065/6000 train_loss:3.6628 train_time:383850ms step_avg:363.84ms step:1066/6000 train_loss:3.6464 train_time:384105ms step_avg:363.74ms step:1067/6000 train_loss:3.6679 train_time:384361ms step_avg:363.63ms step:1068/6000 train_loss:3.5869 train_time:384616ms step_avg:363.53ms step:1069/6000 train_loss:3.6968 train_time:384869ms step_avg:363.43ms step:1070/6000 train_loss:3.5639 train_time:385124ms step_avg:363.32ms step:1071/6000 train_loss:3.8255 train_time:386451ms step_avg:364.23ms step:1072/6000 train_loss:3.7795 train_time:386705ms step_avg:364.13ms step:1073/6000 train_loss:3.7211 train_time:386960ms step_avg:364.03ms step:1074/6000 train_loss:3.7952 train_time:387214ms step_avg:363.92ms step:1075/6000 train_loss:3.7430 train_time:387470ms step_avg:363.82ms step:1076/6000 train_loss:3.6713 train_time:387724ms step_avg:363.72ms step:1077/6000 train_loss:4.0640 train_time:387979ms step_avg:363.62ms step:1078/6000 train_loss:3.7571 train_time:388233ms step_avg:363.51ms step:1079/6000 train_loss:3.4142 train_time:388487ms step_avg:363.41ms step:1080/6000 train_loss:3.8034 train_time:388743ms step_avg:363.31ms step:1081/6000 train_loss:3.7376 train_time:390069ms step_avg:364.21ms step:1082/6000 train_loss:3.7929 train_time:390323ms step_avg:364.11ms step:1083/6000 train_loss:3.8904 train_time:390576ms step_avg:364.00ms step:1084/6000 train_loss:3.7854 train_time:390831ms step_avg:363.90ms step:1085/6000 train_loss:3.7639 train_time:391085ms step_avg:363.80ms step:1086/6000 train_loss:3.7174 train_time:391339ms step_avg:363.70ms step:1087/6000 train_loss:3.9178 train_time:391594ms step_avg:363.60ms step:1088/6000 train_loss:3.8191 train_time:391849ms step_avg:363.50ms step:1089/6000 train_loss:3.6346 train_time:392104ms step_avg:363.40ms step:1090/6000 train_loss:3.6653 train_time:392359ms step_avg:363.30ms step:1091/6000 train_loss:3.7853 train_time:393684ms step_avg:364.18ms step:1092/6000 train_loss:3.5815 train_time:393937ms step_avg:364.08ms step:1093/6000 train_loss:3.7730 train_time:394191ms step_avg:363.98ms step:1094/6000 train_loss:3.9051 train_time:394446ms step_avg:363.88ms step:1095/6000 train_loss:3.7462 train_time:394702ms step_avg:363.78ms step:1096/6000 train_loss:3.6989 train_time:394956ms step_avg:363.68ms step:1097/6000 train_loss:3.7185 train_time:395212ms step_avg:363.58ms step:1098/6000 train_loss:3.7665 train_time:395466ms step_avg:363.48ms step:1099/6000 train_loss:3.8428 train_time:395721ms step_avg:363.38ms step:1100/6000 train_loss:3.7881 train_time:395975ms step_avg:363.28ms step:1101/6000 train_loss:3.7390 train_time:397302ms step_avg:364.16ms step:1102/6000 train_loss:3.5881 train_time:397553ms step_avg:364.06ms step:1103/6000 train_loss:3.6476 train_time:397807ms step_avg:363.96ms step:1104/6000 train_loss:3.7341 train_time:398061ms step_avg:363.86ms step:1105/6000 train_loss:3.6104 train_time:398317ms step_avg:363.76ms step:1106/6000 train_loss:4.3535 train_time:398571ms step_avg:363.66ms step:1107/6000 train_loss:3.5152 train_time:398826ms step_avg:363.56ms step:1108/6000 train_loss:3.8552 train_time:399082ms step_avg:363.46ms step:1109/6000 train_loss:3.6368 train_time:399337ms step_avg:363.36ms step:1110/6000 train_loss:3.7851 train_time:399591ms step_avg:363.26ms step:1111/6000 train_loss:3.7202 train_time:400919ms step_avg:364.14ms step:1112/6000 train_loss:3.7678 train_time:401170ms step_avg:364.04ms step:1113/6000 train_loss:3.8574 train_time:401424ms step_avg:363.94ms step:1114/6000 train_loss:3.7158 train_time:401679ms step_avg:363.84ms step:1115/6000 train_loss:3.6469 train_time:401934ms step_avg:363.74ms step:1116/6000 train_loss:3.5697 train_time:402189ms step_avg:363.64ms step:1117/6000 train_loss:3.7270 train_time:402444ms step_avg:363.55ms step:1118/6000 train_loss:3.8735 train_time:402698ms step_avg:363.45ms step:1119/6000 train_loss:3.9117 train_time:402953ms step_avg:363.35ms step:1120/6000 train_loss:3.7527 train_time:403208ms step_avg:363.25ms step:1121/6000 train_loss:3.7865 train_time:404534ms step_avg:364.12ms step:1122/6000 train_loss:3.6869 train_time:404786ms step_avg:364.02ms step:1123/6000 train_loss:3.7404 train_time:405040ms step_avg:363.92ms step:1124/6000 train_loss:3.8819 train_time:405295ms step_avg:363.82ms step:1125/6000 train_loss:3.6492 train_time:405549ms step_avg:363.72ms step:1125/6000 val_loss:3.7110 train_time:405584ms step_avg:363.75ms step:1126/6000 train_loss:3.5395 train_time:405808ms step_avg:363.63ms step:1127/6000 train_loss:3.7714 train_time:406063ms step_avg:363.53ms step:1128/6000 train_loss:3.9882 train_time:406318ms step_avg:363.43ms step:1129/6000 train_loss:3.5292 train_time:406572ms step_avg:363.34ms step:1130/6000 train_loss:3.8448 train_time:406829ms step_avg:363.24ms step:1131/6000 train_loss:3.6846 train_time:408156ms step_avg:364.10ms step:1132/6000 train_loss:3.7076 train_time:408409ms step_avg:364.00ms step:1133/6000 train_loss:3.6655 train_time:408664ms step_avg:363.90ms step:1134/6000 train_loss:3.8264 train_time:409057ms step_avg:363.93ms step:1135/6000 train_loss:3.7583 train_time:409311ms step_avg:363.83ms step:1136/6000 train_loss:3.8012 train_time:409567ms step_avg:363.74ms step:1137/6000 train_loss:3.8349 train_time:409823ms step_avg:363.64ms step:1138/6000 train_loss:3.7527 train_time:410078ms step_avg:363.54ms step:1139/6000 train_loss:3.6550 train_time:410332ms step_avg:363.45ms step:1140/6000 train_loss:3.9607 train_time:410747ms step_avg:363.49ms step:1141/6000 train_loss:3.7660 train_time:412073ms step_avg:364.34ms step:1142/6000 train_loss:3.8565 train_time:412326ms step_avg:364.25ms step:1143/6000 train_loss:3.7477 train_time:412583ms step_avg:364.15ms step:1144/6000 train_loss:3.6663 train_time:412838ms step_avg:364.05ms step:1145/6000 train_loss:3.7650 train_time:413092ms step_avg:363.96ms step:1146/6000 train_loss:3.8884 train_time:413347ms step_avg:363.86ms step:1147/6000 train_loss:3.8444 train_time:413602ms step_avg:363.77ms step:1148/6000 train_loss:3.7633 train_time:413857ms step_avg:363.67ms step:1149/6000 train_loss:3.7897 train_time:414113ms step_avg:363.58ms step:1150/6000 train_loss:3.6520 train_time:414368ms step_avg:363.48ms step:1151/6000 train_loss:3.6638 train_time:415694ms step_avg:364.32ms step:1152/6000 train_loss:3.6270 train_time:415949ms step_avg:364.23ms step:1153/6000 train_loss:3.7819 train_time:416202ms step_avg:364.13ms step:1154/6000 train_loss:3.7438 train_time:416457ms step_avg:364.04ms step:1155/6000 train_loss:3.8088 train_time:416711ms step_avg:363.94ms step:1156/6000 train_loss:3.6624 train_time:416966ms step_avg:363.84ms step:1157/6000 train_loss:3.8318 train_time:417220ms step_avg:363.75ms step:1158/6000 train_loss:3.7828 train_time:417475ms step_avg:363.65ms step:1159/6000 train_loss:3.6130 train_time:417730ms step_avg:363.56ms step:1160/6000 train_loss:3.6301 train_time:417984ms step_avg:363.46ms step:1161/6000 train_loss:3.6164 train_time:419311ms step_avg:364.30ms step:1162/6000 train_loss:3.4534 train_time:419564ms step_avg:364.20ms step:1163/6000 train_loss:3.7457 train_time:419819ms step_avg:364.11ms step:1164/6000 train_loss:3.7063 train_time:420073ms step_avg:364.01ms step:1165/6000 train_loss:3.5732 train_time:420328ms step_avg:363.92ms step:1166/6000 train_loss:3.5776 train_time:420584ms step_avg:363.83ms step:1167/6000 train_loss:3.6730 train_time:420839ms step_avg:363.73ms step:1168/6000 train_loss:3.6785 train_time:421094ms step_avg:363.64ms step:1169/6000 train_loss:4.0009 train_time:421349ms step_avg:363.55ms step:1170/6000 train_loss:3.6895 train_time:421604ms step_avg:363.45ms step:1171/6000 train_loss:3.7024 train_time:422929ms step_avg:364.28ms step:1172/6000 train_loss:3.6049 train_time:423183ms step_avg:364.19ms step:1173/6000 train_loss:3.7036 train_time:423437ms step_avg:364.09ms step:1174/6000 train_loss:3.8410 train_time:423692ms step_avg:364.00ms step:1175/6000 train_loss:3.6777 train_time:423946ms step_avg:363.90ms step:1176/6000 train_loss:3.7025 train_time:424201ms step_avg:363.81ms step:1177/6000 train_loss:3.7522 train_time:424456ms step_avg:363.72ms step:1178/6000 train_loss:3.7335 train_time:424710ms step_avg:363.62ms step:1179/6000 train_loss:3.7897 train_time:424966ms step_avg:363.53ms step:1180/6000 train_loss:3.7010 train_time:425221ms step_avg:363.44ms step:1181/6000 train_loss:3.7029 train_time:426547ms step_avg:364.26ms step:1182/6000 train_loss:3.6458 train_time:426797ms step_avg:364.16ms step:1183/6000 train_loss:3.7189 train_time:427052ms step_avg:364.07ms step:1184/6000 train_loss:3.6351 train_time:427306ms step_avg:363.97ms step:1185/6000 train_loss:3.7958 train_time:427561ms step_avg:363.88ms step:1186/6000 train_loss:3.8613 train_time:427815ms step_avg:363.79ms step:1187/6000 train_loss:3.6549 train_time:428070ms step_avg:363.70ms step:1188/6000 train_loss:3.7125 train_time:428325ms step_avg:363.60ms step:1189/6000 train_loss:3.7383 train_time:428580ms step_avg:363.51ms step:1190/6000 train_loss:3.5819 train_time:428834ms step_avg:363.42ms step:1191/6000 train_loss:3.7547 train_time:430161ms step_avg:364.23ms step:1192/6000 train_loss:3.8927 train_time:430412ms step_avg:364.14ms step:1193/6000 train_loss:3.6888 train_time:430666ms step_avg:364.05ms step:1194/6000 train_loss:3.5861 train_time:430921ms step_avg:363.95ms step:1195/6000 train_loss:3.8708 train_time:431176ms step_avg:363.86ms step:1196/6000 train_loss:3.6814 train_time:431431ms step_avg:363.77ms step:1197/6000 train_loss:3.6822 train_time:431685ms step_avg:363.68ms step:1198/6000 train_loss:3.5829 train_time:431940ms step_avg:363.59ms step:1199/6000 train_loss:3.5911 train_time:432196ms step_avg:363.49ms step:1200/6000 train_loss:3.6524 train_time:432450ms step_avg:363.40ms step:1201/6000 train_loss:3.7347 train_time:433776ms step_avg:364.21ms step:1202/6000 train_loss:3.8068 train_time:434028ms step_avg:364.12ms step:1203/6000 train_loss:3.9099 train_time:434282ms step_avg:364.03ms step:1204/6000 train_loss:3.7172 train_time:434536ms step_avg:363.93ms step:1205/6000 train_loss:3.6413 train_time:434789ms step_avg:363.84ms step:1206/6000 train_loss:3.7166 train_time:435044ms step_avg:363.75ms step:1207/6000 train_loss:3.7735 train_time:435299ms step_avg:363.66ms step:1208/6000 train_loss:3.8252 train_time:435553ms step_avg:363.57ms step:1209/6000 train_loss:3.7090 train_time:435810ms step_avg:363.48ms step:1210/6000 train_loss:3.5598 train_time:436065ms step_avg:363.39ms step:1211/6000 train_loss:3.6134 train_time:437390ms step_avg:364.19ms step:1212/6000 train_loss:3.7079 train_time:437642ms step_avg:364.09ms step:1213/6000 train_loss:3.7234 train_time:437896ms step_avg:364.00ms step:1214/6000 train_loss:3.7587 train_time:438153ms step_avg:363.91ms step:1215/6000 train_loss:3.6567 train_time:438407ms step_avg:363.82ms step:1216/6000 train_loss:3.7009 train_time:438664ms step_avg:363.73ms step:1217/6000 train_loss:3.6528 train_time:438918ms step_avg:363.64ms step:1218/6000 train_loss:3.6447 train_time:439173ms step_avg:363.55ms step:1219/6000 train_loss:3.7380 train_time:439427ms step_avg:363.46ms step:1220/6000 train_loss:3.5838 train_time:439682ms step_avg:363.37ms step:1221/6000 train_loss:3.8000 train_time:441008ms step_avg:364.17ms step:1222/6000 train_loss:3.8310 train_time:441261ms step_avg:364.08ms step:1223/6000 train_loss:3.7670 train_time:441515ms step_avg:363.99ms step:1224/6000 train_loss:3.5997 train_time:441769ms step_avg:363.90ms step:1225/6000 train_loss:3.6067 train_time:442024ms step_avg:363.81ms step:1226/6000 train_loss:3.6747 train_time:442278ms step_avg:363.72ms step:1227/6000 train_loss:3.6595 train_time:442533ms step_avg:363.63ms step:1228/6000 train_loss:3.5935 train_time:442787ms step_avg:363.54ms step:1229/6000 train_loss:3.7678 train_time:443043ms step_avg:363.45ms step:1230/6000 train_loss:3.6855 train_time:443298ms step_avg:363.36ms step:1231/6000 train_loss:3.7486 train_time:444624ms step_avg:364.15ms step:1232/6000 train_loss:3.9062 train_time:444876ms step_avg:364.06ms step:1233/6000 train_loss:3.7980 train_time:445130ms step_avg:363.97ms step:1234/6000 train_loss:3.7357 train_time:445385ms step_avg:363.88ms step:1235/6000 train_loss:3.8842 train_time:445640ms step_avg:363.79ms step:1236/6000 train_loss:3.6500 train_time:445894ms step_avg:363.70ms step:1237/6000 train_loss:3.6128 train_time:446149ms step_avg:363.61ms step:1238/6000 train_loss:3.5628 train_time:446403ms step_avg:363.52ms step:1239/6000 train_loss:3.6507 train_time:446659ms step_avg:363.43ms step:1240/6000 train_loss:3.6432 train_time:446913ms step_avg:363.34ms step:1241/6000 train_loss:3.6971 train_time:448240ms step_avg:364.13ms step:1242/6000 train_loss:3.7464 train_time:448492ms step_avg:364.04ms step:1243/6000 train_loss:3.6177 train_time:448747ms step_avg:363.95ms step:1244/6000 train_loss:3.7204 train_time:449003ms step_avg:363.86ms step:1245/6000 train_loss:3.7336 train_time:449259ms step_avg:363.77ms step:1246/6000 train_loss:3.7297 train_time:449513ms step_avg:363.68ms step:1247/6000 train_loss:3.5528 train_time:449768ms step_avg:363.60ms step:1248/6000 train_loss:3.6996 train_time:450023ms step_avg:363.51ms step:1249/6000 train_loss:3.7620 train_time:450279ms step_avg:363.42ms step:1250/6000 train_loss:3.7297 train_time:450536ms step_avg:363.34ms step:1250/6000 val_loss:3.6793 train_time:450570ms step_avg:363.36ms step:1251/6000 train_loss:3.6318 train_time:451864ms step_avg:364.11ms step:1252/6000 train_loss:3.8365 train_time:452117ms step_avg:364.02ms step:1253/6000 train_loss:3.6940 train_time:452371ms step_avg:363.93ms step:1254/6000 train_loss:3.6277 train_time:452625ms step_avg:363.85ms step:1255/6000 train_loss:3.7594 train_time:452881ms step_avg:363.76ms step:1256/6000 train_loss:3.8243 train_time:453135ms step_avg:363.67ms step:1257/6000 train_loss:3.6372 train_time:453390ms step_avg:363.58ms step:1258/6000 train_loss:3.6582 train_time:453644ms step_avg:363.50ms step:1259/6000 train_loss:3.6849 train_time:453900ms step_avg:363.41ms step:1260/6000 train_loss:3.6655 train_time:454155ms step_avg:363.32ms step:1261/6000 train_loss:3.5212 train_time:455481ms step_avg:364.09ms step:1262/6000 train_loss:3.6167 train_time:455732ms step_avg:364.00ms step:1263/6000 train_loss:3.6876 train_time:455987ms step_avg:363.92ms step:1264/6000 train_loss:3.5298 train_time:456241ms step_avg:363.83ms step:1265/6000 train_loss:3.7572 train_time:456496ms step_avg:363.74ms step:1266/6000 train_loss:3.7414 train_time:456750ms step_avg:363.65ms step:1267/6000 train_loss:3.7426 train_time:457004ms step_avg:363.57ms step:1268/6000 train_loss:3.6880 train_time:457259ms step_avg:363.48ms step:1269/6000 train_loss:3.7185 train_time:457515ms step_avg:363.40ms step:1270/6000 train_loss:3.5833 train_time:457769ms step_avg:363.31ms step:1271/6000 train_loss:3.4259 train_time:459096ms step_avg:364.07ms step:1272/6000 train_loss:3.7102 train_time:459346ms step_avg:363.98ms step:1273/6000 train_loss:3.6676 train_time:459601ms step_avg:363.90ms step:1274/6000 train_loss:3.7323 train_time:459856ms step_avg:363.81ms step:1275/6000 train_loss:3.6694 train_time:460110ms step_avg:363.72ms step:1276/6000 train_loss:3.7621 train_time:460365ms step_avg:363.64ms step:1277/6000 train_loss:3.7888 train_time:460621ms step_avg:363.55ms step:1278/6000 train_loss:3.7428 train_time:460876ms step_avg:363.47ms step:1279/6000 train_loss:3.7367 train_time:461129ms step_avg:363.38ms step:1280/6000 train_loss:3.5646 train_time:461385ms step_avg:363.30ms step:1281/6000 train_loss:3.6860 train_time:462711ms step_avg:364.05ms step:1282/6000 train_loss:3.7506 train_time:462963ms step_avg:363.96ms step:1283/6000 train_loss:3.7826 train_time:463218ms step_avg:363.88ms step:1284/6000 train_loss:3.6716 train_time:463473ms step_avg:363.79ms step:1285/6000 train_loss:3.7054 train_time:463727ms step_avg:363.71ms step:1286/6000 train_loss:3.6852 train_time:463982ms step_avg:363.62ms step:1287/6000 train_loss:3.6577 train_time:464237ms step_avg:363.54ms step:1288/6000 train_loss:3.7868 train_time:464493ms step_avg:363.45ms step:1289/6000 train_loss:3.6214 train_time:464749ms step_avg:363.37ms step:1290/6000 train_loss:3.7065 train_time:465005ms step_avg:363.28ms step:1291/6000 train_loss:3.7799 train_time:466330ms step_avg:364.04ms step:1292/6000 train_loss:3.7069 train_time:466594ms step_avg:363.96ms step:1293/6000 train_loss:3.8049 train_time:466848ms step_avg:363.87ms step:1294/6000 train_loss:3.8293 train_time:467103ms step_avg:363.79ms step:1295/6000 train_loss:3.8081 train_time:467357ms step_avg:363.70ms step:1296/6000 train_loss:3.6060 train_time:467612ms step_avg:363.62ms step:1297/6000 train_loss:3.6766 train_time:467867ms step_avg:363.53ms step:1298/6000 train_loss:3.5800 train_time:468122ms step_avg:363.45ms step:1299/6000 train_loss:3.6569 train_time:468376ms step_avg:363.36ms step:1300/6000 train_loss:3.7140 train_time:468631ms step_avg:363.28ms step:1301/6000 train_loss:3.7324 train_time:469958ms step_avg:364.03ms step:1302/6000 train_loss:3.7377 train_time:470210ms step_avg:363.94ms step:1303/6000 train_loss:3.8780 train_time:470465ms step_avg:363.86ms step:1304/6000 train_loss:3.6610 train_time:470718ms step_avg:363.77ms step:1305/6000 train_loss:3.8726 train_time:470973ms step_avg:363.69ms step:1306/6000 train_loss:3.5966 train_time:471227ms step_avg:363.60ms step:1307/6000 train_loss:3.7775 train_time:471482ms step_avg:363.52ms step:1308/6000 train_loss:3.7814 train_time:471737ms step_avg:363.43ms step:1309/6000 train_loss:3.6504 train_time:471992ms step_avg:363.35ms step:1310/6000 train_loss:3.6140 train_time:472246ms step_avg:363.27ms step:1311/6000 train_loss:3.6421 train_time:473573ms step_avg:364.01ms step:1312/6000 train_loss:3.6028 train_time:473823ms step_avg:363.92ms step:1313/6000 train_loss:3.7369 train_time:474077ms step_avg:363.83ms step:1314/6000 train_loss:3.6727 train_time:474331ms step_avg:363.75ms step:1315/6000 train_loss:3.3972 train_time:474586ms step_avg:363.67ms step:1316/6000 train_loss:3.6244 train_time:474840ms step_avg:363.58ms step:1317/6000 train_loss:3.6953 train_time:475095ms step_avg:363.50ms step:1318/6000 train_loss:3.7273 train_time:475349ms step_avg:363.42ms step:1319/6000 train_loss:3.5948 train_time:475605ms step_avg:363.33ms step:1320/6000 train_loss:3.7413 train_time:475859ms step_avg:363.25ms step:1321/6000 train_loss:3.7972 train_time:477187ms step_avg:363.99ms step:1322/6000 train_loss:3.6839 train_time:477437ms step_avg:363.90ms step:1323/6000 train_loss:3.6331 train_time:477837ms step_avg:363.93ms step:1324/6000 train_loss:3.6647 train_time:478091ms step_avg:363.84ms step:1325/6000 train_loss:3.7591 train_time:478345ms step_avg:363.76ms step:1326/6000 train_loss:3.8139 train_time:478599ms step_avg:363.68ms step:1327/6000 train_loss:3.5758 train_time:478854ms step_avg:363.59ms step:1328/6000 train_loss:3.4967 train_time:479108ms step_avg:363.51ms step:1329/6000 train_loss:3.7960 train_time:479363ms step_avg:363.43ms step:1330/6000 train_loss:3.6565 train_time:479773ms step_avg:363.46ms step:1331/6000 train_loss:3.7808 train_time:481100ms step_avg:364.19ms step:1332/6000 train_loss:3.6822 train_time:481350ms step_avg:364.11ms step:1333/6000 train_loss:4.0813 train_time:481605ms step_avg:364.03ms step:1334/6000 train_loss:3.7683 train_time:481860ms step_avg:363.94ms step:1335/6000 train_loss:3.6874 train_time:482115ms step_avg:363.86ms step:1336/6000 train_loss:3.6352 train_time:482369ms step_avg:363.78ms step:1337/6000 train_loss:3.6234 train_time:482623ms step_avg:363.69ms step:1338/6000 train_loss:3.8840 train_time:482880ms step_avg:363.61ms step:1339/6000 train_loss:3.8101 train_time:483134ms step_avg:363.53ms step:1340/6000 train_loss:3.6649 train_time:483389ms step_avg:363.45ms step:1341/6000 train_loss:3.6225 train_time:484715ms step_avg:364.17ms step:1342/6000 train_loss:3.9381 train_time:484966ms step_avg:364.09ms step:1343/6000 train_loss:3.6908 train_time:485220ms step_avg:364.01ms step:1344/6000 train_loss:3.6906 train_time:485475ms step_avg:363.92ms step:1345/6000 train_loss:3.7465 train_time:485729ms step_avg:363.84ms step:1346/6000 train_loss:3.7137 train_time:485983ms step_avg:363.76ms step:1347/6000 train_loss:3.6147 train_time:486238ms step_avg:363.68ms step:1348/6000 train_loss:3.5562 train_time:486493ms step_avg:363.60ms step:1349/6000 train_loss:3.6639 train_time:486748ms step_avg:363.52ms step:1350/6000 train_loss:3.5944 train_time:487004ms step_avg:363.44ms step:1351/6000 train_loss:3.7327 train_time:488331ms step_avg:364.15ms step:1352/6000 train_loss:3.5731 train_time:488582ms step_avg:364.07ms step:1353/6000 train_loss:3.6360 train_time:488836ms step_avg:363.99ms step:1354/6000 train_loss:3.7447 train_time:489090ms step_avg:363.91ms step:1355/6000 train_loss:3.5774 train_time:489345ms step_avg:363.83ms step:1356/6000 train_loss:3.5086 train_time:489602ms step_avg:363.75ms step:1357/6000 train_loss:3.8550 train_time:489856ms step_avg:363.66ms step:1358/6000 train_loss:3.7899 train_time:490111ms step_avg:363.58ms step:1359/6000 train_loss:3.4988 train_time:490366ms step_avg:363.50ms step:1360/6000 train_loss:3.7846 train_time:490621ms step_avg:363.42ms step:1361/6000 train_loss:3.6765 train_time:491946ms step_avg:364.14ms step:1362/6000 train_loss:3.5517 train_time:492199ms step_avg:364.05ms step:1363/6000 train_loss:3.7069 train_time:492453ms step_avg:363.97ms step:1364/6000 train_loss:3.5958 train_time:492708ms step_avg:363.89ms step:1365/6000 train_loss:3.6238 train_time:492962ms step_avg:363.81ms step:1366/6000 train_loss:3.6462 train_time:493219ms step_avg:363.73ms step:1367/6000 train_loss:3.7523 train_time:493474ms step_avg:363.65ms step:1368/6000 train_loss:3.7276 train_time:493728ms step_avg:363.57ms step:1369/6000 train_loss:3.6879 train_time:493983ms step_avg:363.49ms step:1370/6000 train_loss:3.5853 train_time:494238ms step_avg:363.41ms step:1371/6000 train_loss:3.9156 train_time:495565ms step_avg:364.12ms step:1372/6000 train_loss:3.6619 train_time:495818ms step_avg:364.04ms step:1373/6000 train_loss:3.6878 train_time:496074ms step_avg:363.96ms step:1374/6000 train_loss:3.6903 train_time:496328ms step_avg:363.88ms step:1375/6000 train_loss:3.4944 train_time:496584ms step_avg:363.80ms step:1375/6000 val_loss:3.6518 train_time:496618ms step_avg:363.82ms step:1376/6000 train_loss:3.8942 train_time:496844ms step_avg:363.72ms step:1377/6000 train_loss:3.6647 train_time:497100ms step_avg:363.64ms step:1378/6000 train_loss:3.8096 train_time:497356ms step_avg:363.56ms step:1379/6000 train_loss:3.8718 train_time:497612ms step_avg:363.49ms step:1380/6000 train_loss:3.5316 train_time:497866ms step_avg:363.41ms step:1381/6000 train_loss:3.6631 train_time:499193ms step_avg:364.11ms step:1382/6000 train_loss:4.1092 train_time:499444ms step_avg:364.03ms step:1383/6000 train_loss:3.5745 train_time:499699ms step_avg:363.95ms step:1384/6000 train_loss:3.7278 train_time:499956ms step_avg:363.87ms step:1385/6000 train_loss:3.8026 train_time:500210ms step_avg:363.79ms step:1386/6000 train_loss:3.7195 train_time:500465ms step_avg:363.71ms step:1387/6000 train_loss:3.7285 train_time:500721ms step_avg:363.63ms step:1388/6000 train_loss:3.5384 train_time:500975ms step_avg:363.55ms step:1389/6000 train_loss:3.6840 train_time:501230ms step_avg:363.47ms step:1390/6000 train_loss:3.6445 train_time:501485ms step_avg:363.39ms step:1391/6000 train_loss:3.9169 train_time:502812ms step_avg:364.09ms step:1392/6000 train_loss:3.6303 train_time:503064ms step_avg:364.01ms step:1393/6000 train_loss:3.6190 train_time:503319ms step_avg:363.93ms step:1394/6000 train_loss:3.5947 train_time:503573ms step_avg:363.85ms step:1395/6000 train_loss:3.8749 train_time:503828ms step_avg:363.77ms step:1396/6000 train_loss:3.7662 train_time:504083ms step_avg:363.70ms step:1397/6000 train_loss:3.7615 train_time:504343ms step_avg:363.62ms step:1398/6000 train_loss:3.6284 train_time:504593ms step_avg:363.54ms step:1399/6000 train_loss:3.6067 train_time:504848ms step_avg:363.46ms step:1400/6000 train_loss:3.6762 train_time:505103ms step_avg:363.38ms step:1401/6000 train_loss:3.6446 train_time:506430ms step_avg:364.08ms step:1402/6000 train_loss:3.6609 train_time:506681ms step_avg:364.00ms step:1403/6000 train_loss:3.6294 train_time:506936ms step_avg:363.92ms step:1404/6000 train_loss:3.8662 train_time:507190ms step_avg:363.84ms step:1405/6000 train_loss:3.6002 train_time:507445ms step_avg:363.76ms step:1406/6000 train_loss:3.6513 train_time:507700ms step_avg:363.68ms step:1407/6000 train_loss:3.6456 train_time:507955ms step_avg:363.60ms step:1408/6000 train_loss:3.5156 train_time:508210ms step_avg:363.53ms step:1409/6000 train_loss:3.6337 train_time:508465ms step_avg:363.45ms step:1410/6000 train_loss:3.6116 train_time:508720ms step_avg:363.37ms step:1411/6000 train_loss:3.6164 train_time:510047ms step_avg:364.06ms step:1412/6000 train_loss:3.7065 train_time:510299ms step_avg:363.98ms step:1413/6000 train_loss:3.6339 train_time:510554ms step_avg:363.90ms step:1414/6000 train_loss:3.6841 train_time:510809ms step_avg:363.82ms step:1415/6000 train_loss:3.6784 train_time:511063ms step_avg:363.75ms step:1416/6000 train_loss:3.7608 train_time:511319ms step_avg:363.67ms step:1417/6000 train_loss:3.5580 train_time:511574ms step_avg:363.59ms step:1418/6000 train_loss:3.6252 train_time:511828ms step_avg:363.51ms step:1419/6000 train_loss:3.7156 train_time:512082ms step_avg:363.44ms step:1420/6000 train_loss:3.7572 train_time:512337ms step_avg:363.36ms step:1421/6000 train_loss:3.7212 train_time:513664ms step_avg:364.04ms step:1422/6000 train_loss:3.7008 train_time:513917ms step_avg:363.96ms step:1423/6000 train_loss:3.6937 train_time:514170ms step_avg:363.89ms step:1424/6000 train_loss:3.6745 train_time:514425ms step_avg:363.81ms step:1425/6000 train_loss:3.6625 train_time:514680ms step_avg:363.73ms step:1426/6000 train_loss:3.5453 train_time:514935ms step_avg:363.65ms step:1427/6000 train_loss:3.6549 train_time:515190ms step_avg:363.58ms step:1428/6000 train_loss:3.5967 train_time:515445ms step_avg:363.50ms step:1429/6000 train_loss:3.7103 train_time:515700ms step_avg:363.43ms step:1430/6000 train_loss:3.6765 train_time:515956ms step_avg:363.35ms step:1431/6000 train_loss:3.6109 train_time:517282ms step_avg:364.03ms step:1432/6000 train_loss:3.6505 train_time:517535ms step_avg:363.95ms step:1433/6000 train_loss:3.6894 train_time:517789ms step_avg:363.87ms step:1434/6000 train_loss:3.5900 train_time:518044ms step_avg:363.79ms step:1435/6000 train_loss:3.6662 train_time:518299ms step_avg:363.72ms step:1436/6000 train_loss:3.4886 train_time:518552ms step_avg:363.64ms step:1437/6000 train_loss:3.5484 train_time:518807ms step_avg:363.56ms step:1438/6000 train_loss:3.7506 train_time:519061ms step_avg:363.49ms step:1439/6000 train_loss:3.7051 train_time:519315ms step_avg:363.41ms step:1440/6000 train_loss:3.6566 train_time:519570ms step_avg:363.34ms step:1441/6000 train_loss:3.5108 train_time:520897ms step_avg:364.01ms step:1442/6000 train_loss:3.6841 train_time:521149ms step_avg:363.93ms step:1443/6000 train_loss:3.7437 train_time:521404ms step_avg:363.85ms step:1444/6000 train_loss:3.8113 train_time:521658ms step_avg:363.78ms step:1445/6000 train_loss:3.7862 train_time:521912ms step_avg:363.70ms step:1446/6000 train_loss:3.6661 train_time:522166ms step_avg:363.63ms step:1447/6000 train_loss:3.5441 train_time:522422ms step_avg:363.55ms step:1448/6000 train_loss:3.6183 train_time:522676ms step_avg:363.47ms step:1449/6000 train_loss:3.6420 train_time:522936ms step_avg:363.40ms step:1450/6000 train_loss:3.7566 train_time:523192ms step_avg:363.33ms step:1451/6000 train_loss:3.7459 train_time:524518ms step_avg:364.00ms step:1452/6000 train_loss:3.5644 train_time:524770ms step_avg:363.92ms step:1453/6000 train_loss:3.6769 train_time:525026ms step_avg:363.84ms step:1454/6000 train_loss:3.5851 train_time:525281ms step_avg:363.77ms step:1455/6000 train_loss:3.6203 train_time:525536ms step_avg:363.69ms step:1456/6000 train_loss:3.6635 train_time:525790ms step_avg:363.62ms step:1457/6000 train_loss:3.5954 train_time:526044ms step_avg:363.54ms step:1458/6000 train_loss:3.4986 train_time:526299ms step_avg:363.47ms step:1459/6000 train_loss:3.7378 train_time:526555ms step_avg:363.39ms step:1460/6000 train_loss:3.6197 train_time:526810ms step_avg:363.32ms step:1461/6000 train_loss:3.6640 train_time:528136ms step_avg:363.98ms step:1462/6000 train_loss:3.7903 train_time:528388ms step_avg:363.90ms step:1463/6000 train_loss:3.6061 train_time:528643ms step_avg:363.83ms step:1464/6000 train_loss:3.7976 train_time:528898ms step_avg:363.75ms step:1465/6000 train_loss:3.6867 train_time:529153ms step_avg:363.68ms step:1466/6000 train_loss:3.7030 train_time:529407ms step_avg:363.60ms step:1467/6000 train_loss:3.6097 train_time:529661ms step_avg:363.53ms step:1468/6000 train_loss:3.7753 train_time:529916ms step_avg:363.45ms step:1469/6000 train_loss:3.6361 train_time:530170ms step_avg:363.38ms step:1470/6000 train_loss:3.6019 train_time:530425ms step_avg:363.30ms step:1471/6000 train_loss:3.6563 train_time:531751ms step_avg:363.96ms step:1472/6000 train_loss:3.5783 train_time:532004ms step_avg:363.89ms step:1473/6000 train_loss:3.6824 train_time:532258ms step_avg:363.81ms step:1474/6000 train_loss:3.7698 train_time:532514ms step_avg:363.74ms step:1475/6000 train_loss:3.6423 train_time:532767ms step_avg:363.66ms step:1476/6000 train_loss:3.4668 train_time:533023ms step_avg:363.59ms step:1477/6000 train_loss:3.5947 train_time:533278ms step_avg:363.52ms step:1478/6000 train_loss:3.5642 train_time:533534ms step_avg:363.44ms step:1479/6000 train_loss:3.6604 train_time:533788ms step_avg:363.37ms step:1480/6000 train_loss:3.7377 train_time:534043ms step_avg:363.29ms step:1481/6000 train_loss:3.6058 train_time:535371ms step_avg:363.95ms step:1482/6000 train_loss:3.7813 train_time:535623ms step_avg:363.87ms step:1483/6000 train_loss:3.7085 train_time:535878ms step_avg:363.80ms step:1484/6000 train_loss:3.6123 train_time:536132ms step_avg:363.73ms step:1485/6000 train_loss:3.6002 train_time:536386ms step_avg:363.65ms step:1486/6000 train_loss:3.5946 train_time:536642ms step_avg:363.58ms step:1487/6000 train_loss:3.5759 train_time:536896ms step_avg:363.50ms step:1488/6000 train_loss:3.6595 train_time:537152ms step_avg:363.43ms step:1489/6000 train_loss:3.5658 train_time:537406ms step_avg:363.36ms step:1490/6000 train_loss:3.6569 train_time:537660ms step_avg:363.28ms step:1491/6000 train_loss:3.6013 train_time:538988ms step_avg:363.94ms step:1492/6000 train_loss:3.5210 train_time:539240ms step_avg:363.86ms step:1493/6000 train_loss:3.5908 train_time:539494ms step_avg:363.79ms step:1494/6000 train_loss:3.7744 train_time:539749ms step_avg:363.71ms step:1495/6000 train_loss:3.6270 train_time:540004ms step_avg:363.64ms step:1496/6000 train_loss:3.3895 train_time:540258ms step_avg:363.57ms step:1497/6000 train_loss:3.6859 train_time:540513ms step_avg:363.49ms step:1498/6000 train_loss:3.6404 train_time:540769ms step_avg:363.42ms step:1499/6000 train_loss:3.6957 train_time:541023ms step_avg:363.35ms step:1500/6000 train_loss:3.6523 train_time:541278ms step_avg:363.27ms step:1500/6000 val_loss:3.6278 train_time:541312ms step_avg:363.30ms step:1501/6000 train_loss:3.6272 train_time:542608ms step_avg:363.92ms step:1502/6000 train_loss:3.4315 train_time:542860ms step_avg:363.85ms step:1503/6000 train_loss:3.7007 train_time:543115ms step_avg:363.77ms step:1504/6000 train_loss:3.5742 train_time:543369ms step_avg:363.70ms step:1505/6000 train_loss:3.5817 train_time:543623ms step_avg:363.63ms step:1506/6000 train_loss:3.5422 train_time:543880ms step_avg:363.56ms step:1507/6000 train_loss:3.6292 train_time:544134ms step_avg:363.48ms step:1508/6000 train_loss:3.5481 train_time:544389ms step_avg:363.41ms step:1509/6000 train_loss:3.8603 train_time:544645ms step_avg:363.34ms step:1510/6000 train_loss:3.5994 train_time:544900ms step_avg:363.27ms step:1511/6000 train_loss:3.6118 train_time:546227ms step_avg:363.91ms step:1512/6000 train_loss:3.7312 train_time:546627ms step_avg:363.93ms step:1513/6000 train_loss:3.7589 train_time:546882ms step_avg:363.86ms step:1514/6000 train_loss:3.6150 train_time:547136ms step_avg:363.79ms step:1515/6000 train_loss:3.4474 train_time:547390ms step_avg:363.71ms step:1516/6000 train_loss:3.5786 train_time:547645ms step_avg:363.64ms step:1517/6000 train_loss:3.5827 train_time:547900ms step_avg:363.57ms step:1518/6000 train_loss:3.6721 train_time:548155ms step_avg:363.50ms step:1519/6000 train_loss:3.5600 train_time:548409ms step_avg:363.43ms step:1520/6000 train_loss:3.8558 train_time:548816ms step_avg:363.45ms step:1521/6000 train_loss:3.5128 train_time:550142ms step_avg:364.09ms step:1522/6000 train_loss:3.5757 train_time:550396ms step_avg:364.02ms step:1523/6000 train_loss:3.7112 train_time:550652ms step_avg:363.95ms step:1524/6000 train_loss:3.5757 train_time:550906ms step_avg:363.87ms step:1525/6000 train_loss:3.6597 train_time:551161ms step_avg:363.80ms step:1526/6000 train_loss:3.6580 train_time:551415ms step_avg:363.73ms step:1527/6000 train_loss:3.6293 train_time:551670ms step_avg:363.66ms step:1528/6000 train_loss:3.6186 train_time:551924ms step_avg:363.59ms step:1529/6000 train_loss:3.7704 train_time:552179ms step_avg:363.52ms step:1530/6000 train_loss:3.7431 train_time:552435ms step_avg:363.44ms step:1531/6000 train_loss:3.5739 train_time:553761ms step_avg:364.08ms step:1532/6000 train_loss:3.5352 train_time:554013ms step_avg:364.00ms step:1533/6000 train_loss:3.7067 train_time:554268ms step_avg:363.93ms step:1534/6000 train_loss:3.6415 train_time:554522ms step_avg:363.86ms step:1535/6000 train_loss:3.6325 train_time:554776ms step_avg:363.79ms step:1536/6000 train_loss:3.6245 train_time:555031ms step_avg:363.72ms step:1537/6000 train_loss:3.5649 train_time:555285ms step_avg:363.64ms step:1538/6000 train_loss:3.6255 train_time:555540ms step_avg:363.57ms step:1539/6000 train_loss:3.8035 train_time:555795ms step_avg:363.50ms step:1540/6000 train_loss:3.7279 train_time:556050ms step_avg:363.43ms step:1541/6000 train_loss:3.6414 train_time:557375ms step_avg:364.06ms step:1542/6000 train_loss:3.5948 train_time:557627ms step_avg:363.99ms step:1543/6000 train_loss:3.5839 train_time:557882ms step_avg:363.92ms step:1544/6000 train_loss:3.5610 train_time:558136ms step_avg:363.84ms step:1545/6000 train_loss:3.6489 train_time:558391ms step_avg:363.77ms step:1546/6000 train_loss:3.6023 train_time:558645ms step_avg:363.70ms step:1547/6000 train_loss:3.5829 train_time:558900ms step_avg:363.63ms step:1548/6000 train_loss:3.5498 train_time:559155ms step_avg:363.56ms step:1549/6000 train_loss:3.5769 train_time:559410ms step_avg:363.49ms step:1550/6000 train_loss:3.6939 train_time:559665ms step_avg:363.42ms step:1551/6000 train_loss:3.6228 train_time:560991ms step_avg:364.04ms step:1552/6000 train_loss:3.5606 train_time:561244ms step_avg:363.97ms step:1553/6000 train_loss:3.5570 train_time:561498ms step_avg:363.90ms step:1554/6000 train_loss:3.5453 train_time:561753ms step_avg:363.83ms step:1555/6000 train_loss:3.6690 train_time:562007ms step_avg:363.76ms step:1556/6000 train_loss:3.6714 train_time:562263ms step_avg:363.69ms step:1557/6000 train_loss:3.6128 train_time:562518ms step_avg:363.62ms step:1558/6000 train_loss:3.6702 train_time:562772ms step_avg:363.55ms step:1559/6000 train_loss:3.5925 train_time:563027ms step_avg:363.48ms step:1560/6000 train_loss:3.5179 train_time:563283ms step_avg:363.41ms step:1561/6000 train_loss:3.7552 train_time:564611ms step_avg:364.03ms step:1562/6000 train_loss:3.5758 train_time:564864ms step_avg:363.96ms step:1563/6000 train_loss:3.5503 train_time:565118ms step_avg:363.89ms step:1564/6000 train_loss:3.6776 train_time:565373ms step_avg:363.82ms step:1565/6000 train_loss:3.4984 train_time:565628ms step_avg:363.75ms step:1566/6000 train_loss:3.5610 train_time:565885ms step_avg:363.68ms step:1567/6000 train_loss:3.7018 train_time:566140ms step_avg:363.61ms step:1568/6000 train_loss:3.5898 train_time:566395ms step_avg:363.54ms step:1569/6000 train_loss:3.5723 train_time:566651ms step_avg:363.47ms step:1570/6000 train_loss:3.6668 train_time:566905ms step_avg:363.40ms step:1571/6000 train_loss:3.6792 train_time:568233ms step_avg:364.02ms step:1572/6000 train_loss:3.5033 train_time:568484ms step_avg:363.95ms step:1573/6000 train_loss:3.5369 train_time:568739ms step_avg:363.88ms step:1574/6000 train_loss:3.6551 train_time:568993ms step_avg:363.81ms step:1575/6000 train_loss:3.5239 train_time:569248ms step_avg:363.74ms step:1576/6000 train_loss:3.6687 train_time:569503ms step_avg:363.67ms step:1577/6000 train_loss:3.5704 train_time:569757ms step_avg:363.60ms step:1578/6000 train_loss:3.6298 train_time:570011ms step_avg:363.53ms step:1579/6000 train_loss:3.5995 train_time:570267ms step_avg:363.46ms step:1580/6000 train_loss:3.5607 train_time:570522ms step_avg:363.39ms step:1581/6000 train_loss:3.5377 train_time:571849ms step_avg:364.00ms step:1582/6000 train_loss:3.7744 train_time:572102ms step_avg:363.93ms step:1583/6000 train_loss:3.5584 train_time:572357ms step_avg:363.86ms step:1584/6000 train_loss:3.7020 train_time:572612ms step_avg:363.79ms step:1585/6000 train_loss:3.5381 train_time:572867ms step_avg:363.72ms step:1586/6000 train_loss:3.7012 train_time:573121ms step_avg:363.66ms step:1587/6000 train_loss:3.4858 train_time:573378ms step_avg:363.59ms step:1588/6000 train_loss:3.6763 train_time:573632ms step_avg:363.52ms step:1589/6000 train_loss:3.5912 train_time:573887ms step_avg:363.45ms step:1590/6000 train_loss:3.7532 train_time:574142ms step_avg:363.38ms step:1591/6000 train_loss:3.5674 train_time:575469ms step_avg:363.99ms step:1592/6000 train_loss:3.5888 train_time:575722ms step_avg:363.92ms step:1593/6000 train_loss:3.6528 train_time:575977ms step_avg:363.85ms step:1594/6000 train_loss:3.6261 train_time:576230ms step_avg:363.78ms step:1595/6000 train_loss:3.6020 train_time:576485ms step_avg:363.71ms step:1596/6000 train_loss:3.7449 train_time:576739ms step_avg:363.64ms step:1597/6000 train_loss:3.4770 train_time:576993ms step_avg:363.57ms step:1598/6000 train_loss:3.6368 train_time:577248ms step_avg:363.51ms step:1599/6000 train_loss:3.6807 train_time:577502ms step_avg:363.44ms step:1600/6000 train_loss:3.7260 train_time:577758ms step_avg:363.37ms step:1601/6000 train_loss:3.5726 train_time:579083ms step_avg:363.97ms step:1602/6000 train_loss:3.8751 train_time:579337ms step_avg:363.91ms step:1603/6000 train_loss:3.7666 train_time:579590ms step_avg:363.84ms step:1604/6000 train_loss:3.5371 train_time:579845ms step_avg:363.77ms step:1605/6000 train_loss:3.5863 train_time:580099ms step_avg:363.70ms step:1606/6000 train_loss:3.4634 train_time:580354ms step_avg:363.63ms step:1607/6000 train_loss:3.7869 train_time:580609ms step_avg:363.56ms step:1608/6000 train_loss:3.5812 train_time:580864ms step_avg:363.49ms step:1609/6000 train_loss:3.6124 train_time:581119ms step_avg:363.43ms step:1610/6000 train_loss:3.5612 train_time:581374ms step_avg:363.36ms step:1611/6000 train_loss:4.1849 train_time:582701ms step_avg:363.96ms step:1612/6000 train_loss:3.7956 train_time:582952ms step_avg:363.89ms step:1613/6000 train_loss:3.7117 train_time:583206ms step_avg:363.82ms step:1614/6000 train_loss:3.5838 train_time:583461ms step_avg:363.75ms step:1615/6000 train_loss:3.6229 train_time:583715ms step_avg:363.69ms step:1616/6000 train_loss:3.6119 train_time:583970ms step_avg:363.62ms step:1617/6000 train_loss:3.5626 train_time:584224ms step_avg:363.55ms step:1618/6000 train_loss:3.6495 train_time:584480ms step_avg:363.48ms step:1619/6000 train_loss:3.6065 train_time:584736ms step_avg:363.42ms step:1620/6000 train_loss:3.5018 train_time:584991ms step_avg:363.35ms step:1621/6000 train_loss:3.7709 train_time:586318ms step_avg:363.95ms step:1622/6000 train_loss:3.6632 train_time:586569ms step_avg:363.88ms step:1623/6000 train_loss:3.4637 train_time:586823ms step_avg:363.81ms step:1624/6000 train_loss:3.5750 train_time:587079ms step_avg:363.74ms step:1625/6000 train_loss:3.5431 train_time:587333ms step_avg:363.67ms step:1625/6000 val_loss:3.6094 train_time:587368ms step_avg:363.70ms step:1626/6000 train_loss:3.6165 train_time:587593ms step_avg:363.61ms step:1627/6000 train_loss:3.5885 train_time:587848ms step_avg:363.54ms step:1628/6000 train_loss:3.5427 train_time:588103ms step_avg:363.48ms step:1629/6000 train_loss:3.6540 train_time:588360ms step_avg:363.41ms step:1630/6000 train_loss:3.5515 train_time:588617ms step_avg:363.34ms step:1631/6000 train_loss:3.6077 train_time:589944ms step_avg:363.94ms step:1632/6000 train_loss:3.4852 train_time:590199ms step_avg:363.87ms step:1633/6000 train_loss:3.4590 train_time:590453ms step_avg:363.80ms step:1634/6000 train_loss:3.6210 train_time:590707ms step_avg:363.74ms step:1635/6000 train_loss:3.5982 train_time:590962ms step_avg:363.67ms step:1636/6000 train_loss:3.5386 train_time:591218ms step_avg:363.60ms step:1637/6000 train_loss:3.6290 train_time:591473ms step_avg:363.54ms step:1638/6000 train_loss:3.6831 train_time:591728ms step_avg:363.47ms step:1639/6000 train_loss:3.7110 train_time:591983ms step_avg:363.40ms step:1640/6000 train_loss:3.8772 train_time:592237ms step_avg:363.34ms step:1641/6000 train_loss:3.7016 train_time:593564ms step_avg:363.93ms step:1642/6000 train_loss:3.6081 train_time:593818ms step_avg:363.86ms step:1643/6000 train_loss:3.6845 train_time:594072ms step_avg:363.79ms step:1644/6000 train_loss:3.5880 train_time:594327ms step_avg:363.72ms step:1645/6000 train_loss:3.6070 train_time:594582ms step_avg:363.66ms step:1646/6000 train_loss:3.6075 train_time:594837ms step_avg:363.59ms step:1647/6000 train_loss:3.3790 train_time:595091ms step_avg:363.53ms step:1648/6000 train_loss:3.6440 train_time:595345ms step_avg:363.46ms step:1649/6000 train_loss:3.5097 train_time:595601ms step_avg:363.39ms step:1650/6000 train_loss:3.5886 train_time:595856ms step_avg:363.33ms step:1651/6000 train_loss:3.5617 train_time:597182ms step_avg:363.91ms step:1652/6000 train_loss:3.6351 train_time:597435ms step_avg:363.85ms step:1653/6000 train_loss:3.5633 train_time:597690ms step_avg:363.78ms step:1654/6000 train_loss:3.6928 train_time:597944ms step_avg:363.71ms step:1655/6000 train_loss:3.6793 train_time:598199ms step_avg:363.65ms step:1656/6000 train_loss:3.4965 train_time:598453ms step_avg:363.58ms step:1657/6000 train_loss:3.6506 train_time:598708ms step_avg:363.51ms step:1658/6000 train_loss:3.5521 train_time:598962ms step_avg:363.45ms step:1659/6000 train_loss:3.5299 train_time:599218ms step_avg:363.38ms step:1660/6000 train_loss:3.6184 train_time:599471ms step_avg:363.32ms step:1661/6000 train_loss:3.6420 train_time:600797ms step_avg:363.90ms step:1662/6000 train_loss:3.5584 train_time:601049ms step_avg:363.83ms step:1663/6000 train_loss:3.6530 train_time:601305ms step_avg:363.77ms step:1664/6000 train_loss:3.6584 train_time:601559ms step_avg:363.70ms step:1665/6000 train_loss:3.6845 train_time:601814ms step_avg:363.63ms step:1666/6000 train_loss:3.6632 train_time:602069ms step_avg:363.57ms step:1667/6000 train_loss:3.8008 train_time:602324ms step_avg:363.50ms step:1668/6000 train_loss:3.5166 train_time:602580ms step_avg:363.44ms step:1669/6000 train_loss:3.5940 train_time:602835ms step_avg:363.37ms step:1670/6000 train_loss:3.5216 train_time:603090ms step_avg:363.31ms step:1671/6000 train_loss:3.5232 train_time:604421ms step_avg:363.89ms step:1672/6000 train_loss:3.6846 train_time:604671ms step_avg:363.82ms step:1673/6000 train_loss:3.8719 train_time:604924ms step_avg:363.75ms step:1674/6000 train_loss:3.5802 train_time:605180ms step_avg:363.69ms step:1675/6000 train_loss:3.5663 train_time:605435ms step_avg:363.62ms step:1676/6000 train_loss:3.4550 train_time:605691ms step_avg:363.56ms step:1677/6000 train_loss:3.6514 train_time:605945ms step_avg:363.49ms step:1678/6000 train_loss:3.5711 train_time:606202ms step_avg:363.43ms step:1679/6000 train_loss:3.5960 train_time:606457ms step_avg:363.37ms step:1680/6000 train_loss:3.5826 train_time:606711ms step_avg:363.30ms step:1681/6000 train_loss:3.4159 train_time:608038ms step_avg:363.88ms step:1682/6000 train_loss:3.5859 train_time:608289ms step_avg:363.81ms step:1683/6000 train_loss:3.6044 train_time:608544ms step_avg:363.74ms step:1684/6000 train_loss:3.6497 train_time:608800ms step_avg:363.68ms step:1685/6000 train_loss:3.6402 train_time:609054ms step_avg:363.61ms step:1686/6000 train_loss:3.5515 train_time:609308ms step_avg:363.55ms step:1687/6000 train_loss:3.6575 train_time:609563ms step_avg:363.48ms step:1688/6000 train_loss:3.5430 train_time:609819ms step_avg:363.42ms step:1689/6000 train_loss:3.6260 train_time:610073ms step_avg:363.36ms step:1690/6000 train_loss:3.5442 train_time:610329ms step_avg:363.29ms step:1691/6000 train_loss:3.4425 train_time:611655ms step_avg:363.86ms step:1692/6000 train_loss:3.5859 train_time:611908ms step_avg:363.80ms step:1693/6000 train_loss:3.5832 train_time:612162ms step_avg:363.73ms step:1694/6000 train_loss:3.5045 train_time:612417ms step_avg:363.67ms step:1695/6000 train_loss:3.9434 train_time:612671ms step_avg:363.60ms step:1696/6000 train_loss:3.6590 train_time:612925ms step_avg:363.54ms step:1697/6000 train_loss:3.6411 train_time:613181ms step_avg:363.47ms step:1698/6000 train_loss:3.5414 train_time:613435ms step_avg:363.41ms step:1699/6000 train_loss:3.4629 train_time:613690ms step_avg:363.34ms step:1700/6000 train_loss:3.5544 train_time:613945ms step_avg:363.28ms step:1701/6000 train_loss:3.5384 train_time:615410ms step_avg:363.93ms step:1702/6000 train_loss:3.6170 train_time:615662ms step_avg:363.87ms step:1703/6000 train_loss:3.5424 train_time:615917ms step_avg:363.80ms step:1704/6000 train_loss:3.7453 train_time:616171ms step_avg:363.74ms step:1705/6000 train_loss:3.5148 train_time:616425ms step_avg:363.67ms step:1706/6000 train_loss:3.7329 train_time:616681ms step_avg:363.61ms step:1707/6000 train_loss:3.5815 train_time:616935ms step_avg:363.54ms step:1708/6000 train_loss:3.3496 train_time:617190ms step_avg:363.48ms step:1709/6000 train_loss:3.6940 train_time:617444ms step_avg:363.42ms step:1710/6000 train_loss:3.6149 train_time:617852ms step_avg:363.44ms step:1711/6000 train_loss:3.5994 train_time:619179ms step_avg:364.01ms step:1712/6000 train_loss:3.5866 train_time:619429ms step_avg:363.94ms step:1713/6000 train_loss:3.6199 train_time:619684ms step_avg:363.88ms step:1714/6000 train_loss:3.6530 train_time:619938ms step_avg:363.81ms step:1715/6000 train_loss:3.5734 train_time:620193ms step_avg:363.75ms step:1716/6000 train_loss:3.5740 train_time:620447ms step_avg:363.69ms step:1717/6000 train_loss:3.4112 train_time:620704ms step_avg:363.62ms step:1718/6000 train_loss:3.5441 train_time:620959ms step_avg:363.56ms step:1719/6000 train_loss:3.5739 train_time:621213ms step_avg:363.50ms step:1720/6000 train_loss:3.5165 train_time:621469ms step_avg:363.43ms step:1721/6000 train_loss:3.6658 train_time:622795ms step_avg:363.99ms step:1722/6000 train_loss:3.4874 train_time:623045ms step_avg:363.93ms step:1723/6000 train_loss:3.6206 train_time:623300ms step_avg:363.86ms step:1724/6000 train_loss:3.7084 train_time:623554ms step_avg:363.80ms step:1725/6000 train_loss:3.5631 train_time:623809ms step_avg:363.74ms step:1726/6000 train_loss:3.7856 train_time:624063ms step_avg:363.67ms step:1727/6000 train_loss:3.5767 train_time:624318ms step_avg:363.61ms step:1728/6000 train_loss:3.6348 train_time:624573ms step_avg:363.55ms step:1729/6000 train_loss:3.6080 train_time:624827ms step_avg:363.48ms step:1730/6000 train_loss:3.6190 train_time:625082ms step_avg:363.42ms step:1731/6000 train_loss:3.9975 train_time:626409ms step_avg:363.98ms step:1732/6000 train_loss:3.5948 train_time:626661ms step_avg:363.91ms step:1733/6000 train_loss:3.7234 train_time:626916ms step_avg:363.85ms step:1734/6000 train_loss:3.5020 train_time:627170ms step_avg:363.79ms step:1735/6000 train_loss:3.5399 train_time:627425ms step_avg:363.72ms step:1736/6000 train_loss:3.5704 train_time:627681ms step_avg:363.66ms step:1737/6000 train_loss:3.5420 train_time:627934ms step_avg:363.60ms step:1738/6000 train_loss:3.6950 train_time:628190ms step_avg:363.54ms step:1739/6000 train_loss:3.5577 train_time:628445ms step_avg:363.47ms step:1740/6000 train_loss:3.6254 train_time:628701ms step_avg:363.41ms step:1741/6000 train_loss:3.6716 train_time:630028ms step_avg:363.97ms step:1742/6000 train_loss:3.4713 train_time:630281ms step_avg:363.90ms step:1743/6000 train_loss:3.3627 train_time:630535ms step_avg:363.84ms step:1744/6000 train_loss:3.3081 train_time:630790ms step_avg:363.78ms step:1745/6000 train_loss:3.5949 train_time:631044ms step_avg:363.71ms step:1746/6000 train_loss:3.6011 train_time:631298ms step_avg:363.65ms step:1747/6000 train_loss:3.5776 train_time:631552ms step_avg:363.59ms step:1748/6000 train_loss:3.5822 train_time:631807ms step_avg:363.53ms step:1749/6000 train_loss:3.8330 train_time:632062ms step_avg:363.46ms step:1750/6000 train_loss:3.5380 train_time:632317ms step_avg:363.40ms step:1750/6000 val_loss:3.5906 train_time:632351ms step_avg:363.42ms step:1751/6000 train_loss:3.6117 train_time:633646ms step_avg:363.96ms step:1752/6000 train_loss:3.5957 train_time:633899ms step_avg:363.89ms step:1753/6000 train_loss:3.2424 train_time:634153ms step_avg:363.83ms step:1754/6000 train_loss:3.3517 train_time:634409ms step_avg:363.77ms step:1755/6000 train_loss:3.4738 train_time:634664ms step_avg:363.70ms step:1756/6000 train_loss:3.3958 train_time:634918ms step_avg:363.64ms step:1757/6000 train_loss:3.5612 train_time:635173ms step_avg:363.58ms step:1758/6000 train_loss:3.4424 train_time:635428ms step_avg:363.52ms step:1759/6000 train_loss:3.4366 train_time:635683ms step_avg:363.46ms step:1760/6000 train_loss:4.4908 train_time:635938ms step_avg:363.39ms step:1761/6000 train_loss:3.5686 train_time:637263ms step_avg:363.94ms step:1762/6000 train_loss:3.6035 train_time:637516ms step_avg:363.88ms step:1763/6000 train_loss:3.6053 train_time:637771ms step_avg:363.82ms step:1764/6000 train_loss:3.6285 train_time:638027ms step_avg:363.76ms step:1765/6000 train_loss:3.5409 train_time:638282ms step_avg:363.69ms step:1766/6000 train_loss:3.5715 train_time:638536ms step_avg:363.63ms step:1767/6000 train_loss:3.5962 train_time:638790ms step_avg:363.57ms step:1768/6000 train_loss:3.8519 train_time:639046ms step_avg:363.51ms step:1769/6000 train_loss:3.5737 train_time:639301ms step_avg:363.45ms step:1770/6000 train_loss:3.6430 train_time:639557ms step_avg:363.38ms step:1771/6000 train_loss:3.9393 train_time:640884ms step_avg:363.93ms step:1772/6000 train_loss:3.5818 train_time:641136ms step_avg:363.87ms step:1773/6000 train_loss:3.4841 train_time:641390ms step_avg:363.81ms step:1774/6000 train_loss:3.7359 train_time:641645ms step_avg:363.74ms step:1775/6000 train_loss:3.4641 train_time:641900ms step_avg:363.68ms step:1776/6000 train_loss:3.6341 train_time:642155ms step_avg:363.62ms step:1777/6000 train_loss:3.6928 train_time:642409ms step_avg:363.56ms step:1778/6000 train_loss:3.7765 train_time:642663ms step_avg:363.50ms step:1779/6000 train_loss:3.5830 train_time:642917ms step_avg:363.44ms step:1780/6000 train_loss:3.8711 train_time:643171ms step_avg:363.37ms step:1781/6000 train_loss:3.6592 train_time:644501ms step_avg:363.92ms step:1782/6000 train_loss:3.6724 train_time:644753ms step_avg:363.86ms step:1783/6000 train_loss:3.4531 train_time:645008ms step_avg:363.79ms step:1784/6000 train_loss:3.5345 train_time:645263ms step_avg:363.73ms step:1785/6000 train_loss:3.6793 train_time:645517ms step_avg:363.67ms step:1786/6000 train_loss:3.5634 train_time:645771ms step_avg:363.61ms step:1787/6000 train_loss:3.7352 train_time:646026ms step_avg:363.55ms step:1788/6000 train_loss:3.5558 train_time:646281ms step_avg:363.49ms step:1789/6000 train_loss:3.5284 train_time:646535ms step_avg:363.43ms step:1790/6000 train_loss:3.6758 train_time:646790ms step_avg:363.37ms step:1791/6000 train_loss:3.5682 train_time:648117ms step_avg:363.91ms step:1792/6000 train_loss:3.5196 train_time:648369ms step_avg:363.84ms step:1793/6000 train_loss:3.6492 train_time:648623ms step_avg:363.78ms step:1794/6000 train_loss:3.5311 train_time:648877ms step_avg:363.72ms step:1795/6000 train_loss:3.5199 train_time:649133ms step_avg:363.66ms step:1796/6000 train_loss:3.5717 train_time:649386ms step_avg:363.60ms step:1797/6000 train_loss:3.5425 train_time:649642ms step_avg:363.54ms step:1798/6000 train_loss:3.6716 train_time:649896ms step_avg:363.48ms step:1799/6000 train_loss:3.5581 train_time:650151ms step_avg:363.42ms step:1800/6000 train_loss:3.6380 train_time:650405ms step_avg:363.35ms step:1801/6000 train_loss:3.5709 train_time:651734ms step_avg:363.89ms step:1802/6000 train_loss:3.6045 train_time:651986ms step_avg:363.83ms step:1803/6000 train_loss:3.5173 train_time:652243ms step_avg:363.77ms step:1804/6000 train_loss:3.4444 train_time:652497ms step_avg:363.71ms step:1805/6000 train_loss:3.6879 train_time:652753ms step_avg:363.65ms step:1806/6000 train_loss:3.6161 train_time:653007ms step_avg:363.59ms step:1807/6000 train_loss:3.6202 train_time:653263ms step_avg:363.53ms step:1808/6000 train_loss:3.7354 train_time:653517ms step_avg:363.47ms step:1809/6000 train_loss:3.5400 train_time:653771ms step_avg:363.41ms step:1810/6000 train_loss:3.6293 train_time:654026ms step_avg:363.35ms step:1811/6000 train_loss:3.7719 train_time:655353ms step_avg:363.88ms step:1812/6000 train_loss:3.6255 train_time:655605ms step_avg:363.82ms step:1813/6000 train_loss:3.6693 train_time:655859ms step_avg:363.76ms step:1814/6000 train_loss:3.6855 train_time:656113ms step_avg:363.70ms step:1815/6000 train_loss:3.6285 train_time:656368ms step_avg:363.64ms step:1816/6000 train_loss:3.6685 train_time:656623ms step_avg:363.58ms step:1817/6000 train_loss:3.6180 train_time:656877ms step_avg:363.52ms step:1818/6000 train_loss:3.6741 train_time:657132ms step_avg:363.46ms step:1819/6000 train_loss:3.5990 train_time:657387ms step_avg:363.40ms step:1820/6000 train_loss:3.5849 train_time:657643ms step_avg:363.34ms step:1821/6000 train_loss:3.5408 train_time:658970ms step_avg:363.87ms step:1822/6000 train_loss:3.5271 train_time:659222ms step_avg:363.81ms step:1823/6000 train_loss:3.4415 train_time:659476ms step_avg:363.75ms step:1824/6000 train_loss:3.5995 train_time:659731ms step_avg:363.69ms step:1825/6000 train_loss:3.7205 train_time:659985ms step_avg:363.63ms step:1826/6000 train_loss:3.6795 train_time:660238ms step_avg:363.57ms step:1827/6000 train_loss:3.6586 train_time:660492ms step_avg:363.51ms step:1828/6000 train_loss:3.5234 train_time:660748ms step_avg:363.45ms step:1829/6000 train_loss:3.5654 train_time:661002ms step_avg:363.39ms step:1830/6000 train_loss:3.6842 train_time:661258ms step_avg:363.33ms step:1831/6000 train_loss:3.4685 train_time:662584ms step_avg:363.86ms step:1832/6000 train_loss:3.6195 train_time:662837ms step_avg:363.80ms step:1833/6000 train_loss:3.4860 train_time:663091ms step_avg:363.74ms step:1834/6000 train_loss:3.8102 train_time:663346ms step_avg:363.68ms step:1835/6000 train_loss:3.6464 train_time:663600ms step_avg:363.62ms step:1836/6000 train_loss:3.6302 train_time:663854ms step_avg:363.56ms step:1837/6000 train_loss:3.7501 train_time:664109ms step_avg:363.50ms step:1838/6000 train_loss:3.6155 train_time:664364ms step_avg:363.44ms step:1839/6000 train_loss:3.4857 train_time:664619ms step_avg:363.38ms step:1840/6000 train_loss:3.6099 train_time:664874ms step_avg:363.32ms step:1841/6000 train_loss:3.4965 train_time:666202ms step_avg:363.85ms step:1842/6000 train_loss:3.6082 train_time:666453ms step_avg:363.78ms step:1843/6000 train_loss:3.6698 train_time:666707ms step_avg:363.72ms step:1844/6000 train_loss:3.4151 train_time:666963ms step_avg:363.67ms step:1845/6000 train_loss:3.5360 train_time:667217ms step_avg:363.61ms step:1846/6000 train_loss:3.5909 train_time:667472ms step_avg:363.55ms step:1847/6000 train_loss:3.5402 train_time:667728ms step_avg:363.49ms step:1848/6000 train_loss:3.4390 train_time:667982ms step_avg:363.43ms step:1849/6000 train_loss:3.7031 train_time:668237ms step_avg:363.37ms step:1850/6000 train_loss:3.4656 train_time:668491ms step_avg:363.31ms step:1851/6000 train_loss:3.5494 train_time:669818ms step_avg:363.83ms step:1852/6000 train_loss:3.5175 train_time:670069ms step_avg:363.77ms step:1853/6000 train_loss:3.7130 train_time:670325ms step_avg:363.71ms step:1854/6000 train_loss:3.6841 train_time:670579ms step_avg:363.65ms step:1855/6000 train_loss:3.5638 train_time:670834ms step_avg:363.60ms step:1856/6000 train_loss:3.5218 train_time:671089ms step_avg:363.54ms step:1857/6000 train_loss:3.5453 train_time:671343ms step_avg:363.48ms step:1858/6000 train_loss:3.7964 train_time:671598ms step_avg:363.42ms step:1859/6000 train_loss:3.6359 train_time:671852ms step_avg:363.36ms step:1860/6000 train_loss:3.5814 train_time:672107ms step_avg:363.30ms step:1861/6000 train_loss:3.6168 train_time:673435ms step_avg:363.82ms step:1862/6000 train_loss:3.5126 train_time:673685ms step_avg:363.76ms step:1863/6000 train_loss:3.5038 train_time:673941ms step_avg:363.70ms step:1864/6000 train_loss:3.5763 train_time:674195ms step_avg:363.64ms step:1865/6000 train_loss:3.6161 train_time:674449ms step_avg:363.58ms step:1866/6000 train_loss:3.3766 train_time:674704ms step_avg:363.53ms step:1867/6000 train_loss:3.5008 train_time:674959ms step_avg:363.47ms step:1868/6000 train_loss:3.4586 train_time:675213ms step_avg:363.41ms step:1869/6000 train_loss:3.4636 train_time:675467ms step_avg:363.35ms step:1870/6000 train_loss:3.6199 train_time:675722ms step_avg:363.29ms step:1871/6000 train_loss:3.6093 train_time:677048ms step_avg:363.81ms step:1872/6000 train_loss:3.5582 train_time:677301ms step_avg:363.75ms step:1873/6000 train_loss:3.5592 train_time:677555ms step_avg:363.69ms step:1874/6000 train_loss:3.5000 train_time:677810ms step_avg:363.63ms step:1875/6000 train_loss:3.5983 train_time:678064ms step_avg:363.57ms step:1875/6000 val_loss:3.5713 train_time:678099ms step_avg:363.59ms step:1876/6000 train_loss:3.5961 train_time:678324ms step_avg:363.52ms step:1877/6000 train_loss:3.5203 train_time:678582ms step_avg:363.46ms step:1878/6000 train_loss:3.5605 train_time:678837ms step_avg:363.40ms step:1879/6000 train_loss:3.6744 train_time:679091ms step_avg:363.34ms step:1880/6000 train_loss:3.5537 train_time:679346ms step_avg:363.29ms step:1881/6000 train_loss:3.6086 train_time:680673ms step_avg:363.80ms step:1882/6000 train_loss:3.5273 train_time:680924ms step_avg:363.74ms step:1883/6000 train_loss:3.5934 train_time:681180ms step_avg:363.68ms step:1884/6000 train_loss:3.5923 train_time:681435ms step_avg:363.63ms step:1885/6000 train_loss:3.3423 train_time:681689ms step_avg:363.57ms step:1886/6000 train_loss:3.7464 train_time:681944ms step_avg:363.51ms step:1887/6000 train_loss:3.4741 train_time:682199ms step_avg:363.45ms step:1888/6000 train_loss:3.5029 train_time:682453ms step_avg:363.39ms step:1889/6000 train_loss:3.5701 train_time:682708ms step_avg:363.34ms step:1890/6000 train_loss:3.6103 train_time:683103ms step_avg:363.35ms step:1891/6000 train_loss:3.4439 train_time:684430ms step_avg:363.86ms step:1892/6000 train_loss:3.7148 train_time:684684ms step_avg:363.81ms step:1893/6000 train_loss:3.4717 train_time:684939ms step_avg:363.75ms step:1894/6000 train_loss:3.5919 train_time:685195ms step_avg:363.69ms step:1895/6000 train_loss:3.6398 train_time:685450ms step_avg:363.63ms step:1896/6000 train_loss:3.4399 train_time:685705ms step_avg:363.58ms step:1897/6000 train_loss:3.5980 train_time:685960ms step_avg:363.52ms step:1898/6000 train_loss:3.5630 train_time:686214ms step_avg:363.46ms step:1899/6000 train_loss:3.6353 train_time:686470ms step_avg:363.40ms step:1900/6000 train_loss:3.4243 train_time:686886ms step_avg:363.43ms step:1901/6000 train_loss:3.6630 train_time:688212ms step_avg:363.94ms step:1902/6000 train_loss:3.5477 train_time:688465ms step_avg:363.88ms step:1903/6000 train_loss:3.7009 train_time:688719ms step_avg:363.82ms step:1904/6000 train_loss:3.5062 train_time:688975ms step_avg:363.77ms step:1905/6000 train_loss:3.7951 train_time:689229ms step_avg:363.71ms step:1906/6000 train_loss:3.5249 train_time:689483ms step_avg:363.65ms step:1907/6000 train_loss:3.5168 train_time:689738ms step_avg:363.59ms step:1908/6000 train_loss:3.5958 train_time:689993ms step_avg:363.54ms step:1909/6000 train_loss:3.4818 train_time:690248ms step_avg:363.48ms step:1910/6000 train_loss:3.5424 train_time:690503ms step_avg:363.42ms step:1911/6000 train_loss:3.6396 train_time:691830ms step_avg:363.93ms step:1912/6000 train_loss:3.5661 train_time:692081ms step_avg:363.87ms step:1913/6000 train_loss:3.4374 train_time:692336ms step_avg:363.81ms step:1914/6000 train_loss:3.3188 train_time:692592ms step_avg:363.76ms step:1915/6000 train_loss:3.5172 train_time:692846ms step_avg:363.70ms step:1916/6000 train_loss:3.7375 train_time:693100ms step_avg:363.64ms step:1917/6000 train_loss:3.7237 train_time:693355ms step_avg:363.58ms step:1918/6000 train_loss:3.6781 train_time:693609ms step_avg:363.53ms step:1919/6000 train_loss:3.5092 train_time:693866ms step_avg:363.47ms step:1920/6000 train_loss:3.7513 train_time:694120ms step_avg:363.41ms step:1921/6000 train_loss:3.5708 train_time:695449ms step_avg:363.92ms step:1922/6000 train_loss:3.5000 train_time:695699ms step_avg:363.86ms step:1923/6000 train_loss:3.6812 train_time:695954ms step_avg:363.80ms step:1924/6000 train_loss:3.6436 train_time:696209ms step_avg:363.75ms step:1925/6000 train_loss:3.4818 train_time:696463ms step_avg:363.69ms step:1926/6000 train_loss:3.5191 train_time:696720ms step_avg:363.63ms step:1927/6000 train_loss:3.4262 train_time:696976ms step_avg:363.58ms step:1928/6000 train_loss:3.5350 train_time:697230ms step_avg:363.52ms step:1929/6000 train_loss:3.4051 train_time:697486ms step_avg:363.46ms step:1930/6000 train_loss:3.5068 train_time:697740ms step_avg:363.41ms step:1931/6000 train_loss:3.6419 train_time:699066ms step_avg:363.91ms step:1932/6000 train_loss:3.5164 train_time:699319ms step_avg:363.85ms step:1933/6000 train_loss:3.6518 train_time:699575ms step_avg:363.79ms step:1934/6000 train_loss:3.5282 train_time:699828ms step_avg:363.74ms step:1935/6000 train_loss:3.5755 train_time:700083ms step_avg:363.68ms step:1936/6000 train_loss:3.6136 train_time:700337ms step_avg:363.62ms step:1937/6000 train_loss:3.5686 train_time:700593ms step_avg:363.57ms step:1938/6000 train_loss:3.5931 train_time:700847ms step_avg:363.51ms step:1939/6000 train_loss:3.5200 train_time:701101ms step_avg:363.45ms step:1940/6000 train_loss:3.6158 train_time:701355ms step_avg:363.40ms step:1941/6000 train_loss:3.6526 train_time:702682ms step_avg:363.90ms step:1942/6000 train_loss:3.4875 train_time:702935ms step_avg:363.84ms step:1943/6000 train_loss:3.5190 train_time:703191ms step_avg:363.78ms step:1944/6000 train_loss:3.5903 train_time:703445ms step_avg:363.73ms step:1945/6000 train_loss:3.4333 train_time:703699ms step_avg:363.67ms step:1946/6000 train_loss:3.6992 train_time:703954ms step_avg:363.61ms step:1947/6000 train_loss:3.5747 train_time:704208ms step_avg:363.56ms step:1948/6000 train_loss:3.5605 train_time:704463ms step_avg:363.50ms step:1949/6000 train_loss:3.5598 train_time:704717ms step_avg:363.44ms step:1950/6000 train_loss:3.4377 train_time:704972ms step_avg:363.39ms step:1951/6000 train_loss:3.5559 train_time:706298ms step_avg:363.88ms step:1952/6000 train_loss:3.4030 train_time:706551ms step_avg:363.83ms step:1953/6000 train_loss:3.6118 train_time:706805ms step_avg:363.77ms step:1954/6000 train_loss:3.6113 train_time:707060ms step_avg:363.71ms step:1955/6000 train_loss:3.5574 train_time:707315ms step_avg:363.66ms step:1956/6000 train_loss:3.4523 train_time:707570ms step_avg:363.60ms step:1957/6000 train_loss:3.5400 train_time:707824ms step_avg:363.55ms step:1958/6000 train_loss:3.7236 train_time:708079ms step_avg:363.49ms step:1959/6000 train_loss:3.6384 train_time:708333ms step_avg:363.43ms step:1960/6000 train_loss:3.6674 train_time:708588ms step_avg:363.38ms step:1961/6000 train_loss:3.4600 train_time:709914ms step_avg:363.87ms step:1962/6000 train_loss:3.5814 train_time:710166ms step_avg:363.81ms step:1963/6000 train_loss:3.6220 train_time:710419ms step_avg:363.76ms step:1964/6000 train_loss:3.5778 train_time:710675ms step_avg:363.70ms step:1965/6000 train_loss:3.4852 train_time:710929ms step_avg:363.65ms step:1966/6000 train_loss:3.8881 train_time:711185ms step_avg:363.59ms step:1967/6000 train_loss:3.5070 train_time:711439ms step_avg:363.54ms step:1968/6000 train_loss:3.5444 train_time:711695ms step_avg:363.48ms step:1969/6000 train_loss:3.5936 train_time:711950ms step_avg:363.42ms step:1970/6000 train_loss:3.5534 train_time:712204ms step_avg:363.37ms step:1971/6000 train_loss:3.4386 train_time:713531ms step_avg:363.86ms step:1972/6000 train_loss:3.4285 train_time:713784ms step_avg:363.80ms step:1973/6000 train_loss:3.5433 train_time:714038ms step_avg:363.75ms step:1974/6000 train_loss:3.5127 train_time:714293ms step_avg:363.69ms step:1975/6000 train_loss:3.4822 train_time:714548ms step_avg:363.64ms step:1976/6000 train_loss:3.6406 train_time:714804ms step_avg:363.58ms step:1977/6000 train_loss:3.5063 train_time:715059ms step_avg:363.53ms step:1978/6000 train_loss:3.8854 train_time:715314ms step_avg:363.47ms step:1979/6000 train_loss:3.5618 train_time:715568ms step_avg:363.42ms step:1980/6000 train_loss:3.5599 train_time:715823ms step_avg:363.36ms step:1981/6000 train_loss:3.5725 train_time:717150ms step_avg:363.85ms step:1982/6000 train_loss:3.5950 train_time:717401ms step_avg:363.79ms step:1983/6000 train_loss:3.5247 train_time:717656ms step_avg:363.74ms step:1984/6000 train_loss:3.4883 train_time:717911ms step_avg:363.68ms step:1985/6000 train_loss:3.5414 train_time:718165ms step_avg:363.63ms step:1986/6000 train_loss:3.6063 train_time:718419ms step_avg:363.57ms step:1987/6000 train_loss:3.5856 train_time:718675ms step_avg:363.52ms step:1988/6000 train_loss:3.5482 train_time:718928ms step_avg:363.46ms step:1989/6000 train_loss:3.6335 train_time:719183ms step_avg:363.41ms step:1990/6000 train_loss:3.6700 train_time:719439ms step_avg:363.35ms step:1991/6000 train_loss:3.4438 train_time:720764ms step_avg:363.84ms step:1992/6000 train_loss:3.4394 train_time:721015ms step_avg:363.78ms step:1993/6000 train_loss:3.6308 train_time:721270ms step_avg:363.73ms step:1994/6000 train_loss:3.4468 train_time:721525ms step_avg:363.67ms step:1995/6000 train_loss:3.5371 train_time:721780ms step_avg:363.62ms step:1996/6000 train_loss:3.6115 train_time:722034ms step_avg:363.56ms step:1997/6000 train_loss:3.4660 train_time:722288ms step_avg:363.51ms step:1998/6000 train_loss:3.5826 train_time:722543ms step_avg:363.45ms step:1999/6000 train_loss:3.5779 train_time:722798ms step_avg:363.40ms step:2000/6000 train_loss:3.5000 train_time:723053ms step_avg:363.34ms step:2000/6000 val_loss:3.5578 train_time:723087ms step_avg:363.36ms step:2001/6000 train_loss:3.6445 train_time:724385ms step_avg:363.83ms step:2002/6000 train_loss:3.5911 train_time:724637ms step_avg:363.77ms step:2003/6000 train_loss:3.6769 train_time:724892ms step_avg:363.72ms step:2004/6000 train_loss:3.5948 train_time:725146ms step_avg:363.66ms step:2005/6000 train_loss:3.6123 train_time:725404ms step_avg:363.61ms step:2006/6000 train_loss:3.4941 train_time:725657ms step_avg:363.56ms step:2007/6000 train_loss:3.5249 train_time:725912ms step_avg:363.50ms step:2008/6000 train_loss:3.5631 train_time:726167ms step_avg:363.45ms step:2009/6000 train_loss:3.6078 train_time:726422ms step_avg:363.39ms step:2010/6000 train_loss:3.5073 train_time:726677ms step_avg:363.34ms step:2011/6000 train_loss:3.5940 train_time:728003ms step_avg:363.82ms step:2012/6000 train_loss:3.5624 train_time:728254ms step_avg:363.76ms step:2013/6000 train_loss:3.5706 train_time:728510ms step_avg:363.71ms step:2014/6000 train_loss:3.4906 train_time:728765ms step_avg:363.66ms step:2015/6000 train_loss:3.5338 train_time:729019ms step_avg:363.60ms step:2016/6000 train_loss:3.5470 train_time:729274ms step_avg:363.55ms step:2017/6000 train_loss:3.6855 train_time:729529ms step_avg:363.49ms step:2018/6000 train_loss:3.5255 train_time:729785ms step_avg:363.44ms step:2019/6000 train_loss:3.6908 train_time:730039ms step_avg:363.38ms step:2020/6000 train_loss:3.6888 train_time:730295ms step_avg:363.33ms step:2021/6000 train_loss:3.4019 train_time:731621ms step_avg:363.81ms step:2022/6000 train_loss:3.6391 train_time:731873ms step_avg:363.75ms step:2023/6000 train_loss:3.5532 train_time:732128ms step_avg:363.70ms step:2024/6000 train_loss:3.6522 train_time:732383ms step_avg:363.65ms step:2025/6000 train_loss:3.6942 train_time:732638ms step_avg:363.59ms step:2026/6000 train_loss:3.4858 train_time:732892ms step_avg:363.54ms step:2027/6000 train_loss:3.5280 train_time:733146ms step_avg:363.48ms step:2028/6000 train_loss:3.4213 train_time:733401ms step_avg:363.43ms step:2029/6000 train_loss:3.5357 train_time:733656ms step_avg:363.38ms step:2030/6000 train_loss:3.4557 train_time:733910ms step_avg:363.32ms step:2031/6000 train_loss:3.5502 train_time:735237ms step_avg:363.80ms step:2032/6000 train_loss:3.5473 train_time:735489ms step_avg:363.74ms step:2033/6000 train_loss:3.5571 train_time:735743ms step_avg:363.69ms step:2034/6000 train_loss:3.4548 train_time:735997ms step_avg:363.63ms step:2035/6000 train_loss:3.6055 train_time:736251ms step_avg:363.58ms step:2036/6000 train_loss:3.6181 train_time:736507ms step_avg:363.53ms step:2037/6000 train_loss:3.6045 train_time:736763ms step_avg:363.47ms step:2038/6000 train_loss:3.4732 train_time:737019ms step_avg:363.42ms step:2039/6000 train_loss:3.7340 train_time:737273ms step_avg:363.37ms step:2040/6000 train_loss:3.5614 train_time:737527ms step_avg:363.31ms step:2041/6000 train_loss:3.5862 train_time:738855ms step_avg:363.79ms step:2042/6000 train_loss:3.5358 train_time:739106ms step_avg:363.73ms step:2043/6000 train_loss:3.4295 train_time:739360ms step_avg:363.68ms step:2044/6000 train_loss:3.5641 train_time:739614ms step_avg:363.63ms step:2045/6000 train_loss:3.5605 train_time:739869ms step_avg:363.57ms step:2046/6000 train_loss:3.4222 train_time:740125ms step_avg:363.52ms step:2047/6000 train_loss:3.4927 train_time:740378ms step_avg:363.47ms step:2048/6000 train_loss:3.5728 train_time:740633ms step_avg:363.41ms step:2049/6000 train_loss:3.5172 train_time:740888ms step_avg:363.36ms step:2050/6000 train_loss:3.5592 train_time:741143ms step_avg:363.31ms step:2051/6000 train_loss:3.7185 train_time:742472ms step_avg:363.78ms step:2052/6000 train_loss:3.5843 train_time:742726ms step_avg:363.72ms step:2053/6000 train_loss:3.5342 train_time:742981ms step_avg:363.67ms step:2054/6000 train_loss:3.5129 train_time:743235ms step_avg:363.62ms step:2055/6000 train_loss:3.3863 train_time:743490ms step_avg:363.56ms step:2056/6000 train_loss:3.4878 train_time:743744ms step_avg:363.51ms step:2057/6000 train_loss:3.6627 train_time:743999ms step_avg:363.46ms step:2058/6000 train_loss:3.6861 train_time:744254ms step_avg:363.41ms step:2059/6000 train_loss:3.5513 train_time:744509ms step_avg:363.35ms step:2060/6000 train_loss:3.5968 train_time:744763ms step_avg:363.30ms step:2061/6000 train_loss:3.5838 train_time:746092ms step_avg:363.77ms step:2062/6000 train_loss:3.5291 train_time:746346ms step_avg:363.72ms step:2063/6000 train_loss:3.4405 train_time:746600ms step_avg:363.66ms step:2064/6000 train_loss:3.7494 train_time:746854ms step_avg:363.61ms step:2065/6000 train_loss:3.6095 train_time:747109ms step_avg:363.56ms step:2066/6000 train_loss:3.5560 train_time:747363ms step_avg:363.50ms step:2067/6000 train_loss:3.6020 train_time:747619ms step_avg:363.45ms step:2068/6000 train_loss:3.5087 train_time:747873ms step_avg:363.40ms step:2069/6000 train_loss:3.5674 train_time:748127ms step_avg:363.34ms step:2070/6000 train_loss:3.6967 train_time:748383ms step_avg:363.29ms step:2071/6000 train_loss:3.7040 train_time:749709ms step_avg:363.76ms step:2072/6000 train_loss:3.5505 train_time:749960ms step_avg:363.71ms step:2073/6000 train_loss:3.5834 train_time:750215ms step_avg:363.65ms step:2074/6000 train_loss:3.4667 train_time:750470ms step_avg:363.60ms step:2075/6000 train_loss:3.9941 train_time:750725ms step_avg:363.55ms step:2076/6000 train_loss:3.4283 train_time:750979ms step_avg:363.49ms step:2077/6000 train_loss:3.5887 train_time:751233ms step_avg:363.44ms step:2078/6000 train_loss:3.4854 train_time:751488ms step_avg:363.39ms step:2079/6000 train_loss:3.4573 train_time:751883ms step_avg:363.40ms step:2080/6000 train_loss:3.5531 train_time:752138ms step_avg:363.35ms step:2081/6000 train_loss:3.8238 train_time:753466ms step_avg:363.82ms step:2082/6000 train_loss:3.4393 train_time:753720ms step_avg:363.76ms step:2083/6000 train_loss:3.7722 train_time:753974ms step_avg:363.71ms step:2084/6000 train_loss:3.4777 train_time:754229ms step_avg:363.66ms step:2085/6000 train_loss:3.4520 train_time:754483ms step_avg:363.61ms step:2086/6000 train_loss:3.7022 train_time:754738ms step_avg:363.55ms step:2087/6000 train_loss:3.6426 train_time:754992ms step_avg:363.50ms step:2088/6000 train_loss:3.6173 train_time:755246ms step_avg:363.45ms step:2089/6000 train_loss:3.6734 train_time:755501ms step_avg:363.40ms step:2090/6000 train_loss:3.5995 train_time:755920ms step_avg:363.42ms step:2091/6000 train_loss:3.5864 train_time:757246ms step_avg:363.89ms step:2092/6000 train_loss:3.5365 train_time:757499ms step_avg:363.83ms step:2093/6000 train_loss:3.6129 train_time:757752ms step_avg:363.78ms step:2094/6000 train_loss:3.5010 train_time:758007ms step_avg:363.73ms step:2095/6000 train_loss:3.3117 train_time:758262ms step_avg:363.67ms step:2096/6000 train_loss:3.5336 train_time:758516ms step_avg:363.62ms step:2097/6000 train_loss:3.7082 train_time:758772ms step_avg:363.57ms step:2098/6000 train_loss:3.5343 train_time:759026ms step_avg:363.52ms step:2099/6000 train_loss:3.4214 train_time:759282ms step_avg:363.47ms step:2100/6000 train_loss:3.5248 train_time:759535ms step_avg:363.41ms step:2101/6000 train_loss:3.4901 train_time:760862ms step_avg:363.87ms step:2102/6000 train_loss:3.6209 train_time:761113ms step_avg:363.82ms step:2103/6000 train_loss:3.4586 train_time:761367ms step_avg:363.77ms step:2104/6000 train_loss:3.4156 train_time:761623ms step_avg:363.72ms step:2105/6000 train_loss:3.6854 train_time:761877ms step_avg:363.66ms step:2106/6000 train_loss:3.4211 train_time:762132ms step_avg:363.61ms step:2107/6000 train_loss:3.8108 train_time:762387ms step_avg:363.56ms step:2108/6000 train_loss:3.6442 train_time:762643ms step_avg:363.51ms step:2109/6000 train_loss:3.5582 train_time:762897ms step_avg:363.46ms step:2110/6000 train_loss:3.5674 train_time:763152ms step_avg:363.41ms step:2111/6000 train_loss:3.3985 train_time:764479ms step_avg:363.86ms step:2112/6000 train_loss:3.8802 train_time:764729ms step_avg:363.81ms step:2113/6000 train_loss:3.5765 train_time:764985ms step_avg:363.76ms step:2114/6000 train_loss:3.4975 train_time:765239ms step_avg:363.71ms step:2115/6000 train_loss:3.6157 train_time:765495ms step_avg:363.66ms step:2116/6000 train_loss:3.5711 train_time:765749ms step_avg:363.60ms step:2117/6000 train_loss:3.5568 train_time:766006ms step_avg:363.55ms step:2118/6000 train_loss:3.6080 train_time:766259ms step_avg:363.50ms step:2119/6000 train_loss:3.4689 train_time:766515ms step_avg:363.45ms step:2120/6000 train_loss:3.5296 train_time:766770ms step_avg:363.40ms step:2121/6000 train_loss:3.2408 train_time:768098ms step_avg:363.86ms step:2122/6000 train_loss:3.4372 train_time:768350ms step_avg:363.80ms step:2123/6000 train_loss:3.5967 train_time:768605ms step_avg:363.75ms step:2124/6000 train_loss:3.5165 train_time:768859ms step_avg:363.70ms step:2125/6000 train_loss:3.6599 train_time:769115ms step_avg:363.65ms step:2125/6000 val_loss:3.5447 train_time:769149ms step_avg:363.66ms step:2126/6000 train_loss:3.5290 train_time:769373ms step_avg:363.60ms step:2127/6000 train_loss:3.6518 train_time:769628ms step_avg:363.55ms step:2128/6000 train_loss:3.6233 train_time:769884ms step_avg:363.50ms step:2129/6000 train_loss:3.5126 train_time:770138ms step_avg:363.44ms step:2130/6000 train_loss:3.4654 train_time:770392ms step_avg:363.39ms step:2131/6000 train_loss:3.4937 train_time:771720ms step_avg:363.85ms step:2132/6000 train_loss:3.6524 train_time:771971ms step_avg:363.79ms step:2133/6000 train_loss:3.5324 train_time:772227ms step_avg:363.74ms step:2134/6000 train_loss:3.4356 train_time:772482ms step_avg:363.69ms step:2135/6000 train_loss:3.5072 train_time:772737ms step_avg:363.64ms step:2136/6000 train_loss:3.6218 train_time:772991ms step_avg:363.59ms step:2137/6000 train_loss:3.6314 train_time:773246ms step_avg:363.54ms step:2138/6000 train_loss:3.5841 train_time:773501ms step_avg:363.49ms step:2139/6000 train_loss:3.5660 train_time:773756ms step_avg:363.44ms step:2140/6000 train_loss:3.5545 train_time:774010ms step_avg:363.39ms step:2141/6000 train_loss:3.6416 train_time:775338ms step_avg:363.84ms step:2142/6000 train_loss:3.9386 train_time:775590ms step_avg:363.79ms step:2143/6000 train_loss:3.4724 train_time:775845ms step_avg:363.73ms step:2144/6000 train_loss:3.5042 train_time:776098ms step_avg:363.68ms step:2145/6000 train_loss:3.5399 train_time:776354ms step_avg:363.63ms step:2146/6000 train_loss:3.6649 train_time:776609ms step_avg:363.58ms step:2147/6000 train_loss:3.6009 train_time:776865ms step_avg:363.53ms step:2148/6000 train_loss:4.0030 train_time:777120ms step_avg:363.48ms step:2149/6000 train_loss:3.5294 train_time:777375ms step_avg:363.43ms step:2150/6000 train_loss:3.5023 train_time:777630ms step_avg:363.38ms step:2151/6000 train_loss:3.5611 train_time:778957ms step_avg:363.83ms step:2152/6000 train_loss:3.5942 train_time:779209ms step_avg:363.78ms step:2153/6000 train_loss:3.5484 train_time:779465ms step_avg:363.73ms step:2154/6000 train_loss:3.4808 train_time:779719ms step_avg:363.67ms step:2155/6000 train_loss:3.6968 train_time:779973ms step_avg:363.62ms step:2156/6000 train_loss:3.3149 train_time:780227ms step_avg:363.57ms step:2157/6000 train_loss:3.4766 train_time:780482ms step_avg:363.52ms step:2158/6000 train_loss:3.6140 train_time:780737ms step_avg:363.47ms step:2159/6000 train_loss:3.5539 train_time:780991ms step_avg:363.42ms step:2160/6000 train_loss:3.7141 train_time:781248ms step_avg:363.37ms step:2161/6000 train_loss:3.6273 train_time:782576ms step_avg:363.82ms step:2162/6000 train_loss:3.5528 train_time:782827ms step_avg:363.77ms step:2163/6000 train_loss:3.5193 train_time:783082ms step_avg:363.72ms step:2164/6000 train_loss:3.5198 train_time:783337ms step_avg:363.67ms step:2165/6000 train_loss:3.6036 train_time:783591ms step_avg:363.62ms step:2166/6000 train_loss:3.6218 train_time:783847ms step_avg:363.57ms step:2167/6000 train_loss:3.5597 train_time:784101ms step_avg:363.51ms step:2168/6000 train_loss:3.4464 train_time:784355ms step_avg:363.46ms step:2169/6000 train_loss:3.5319 train_time:784610ms step_avg:363.41ms step:2170/6000 train_loss:3.5809 train_time:784866ms step_avg:363.36ms step:2171/6000 train_loss:3.6937 train_time:786191ms step_avg:363.81ms step:2172/6000 train_loss:3.4971 train_time:786444ms step_avg:363.76ms step:2173/6000 train_loss:3.4821 train_time:786699ms step_avg:363.71ms step:2174/6000 train_loss:3.4923 train_time:786952ms step_avg:363.66ms step:2175/6000 train_loss:3.5476 train_time:787207ms step_avg:363.61ms step:2176/6000 train_loss:3.5057 train_time:787463ms step_avg:363.56ms step:2177/6000 train_loss:3.4751 train_time:787718ms step_avg:363.51ms step:2178/6000 train_loss:3.6990 train_time:787972ms step_avg:363.46ms step:2179/6000 train_loss:3.5204 train_time:788227ms step_avg:363.41ms step:2180/6000 train_loss:3.5249 train_time:788481ms step_avg:363.36ms step:2181/6000 train_loss:3.5973 train_time:789808ms step_avg:363.80ms step:2182/6000 train_loss:3.5646 train_time:790061ms step_avg:363.75ms step:2183/6000 train_loss:3.5474 train_time:790315ms step_avg:363.70ms step:2184/6000 train_loss:3.4371 train_time:790571ms step_avg:363.65ms step:2185/6000 train_loss:3.6034 train_time:790826ms step_avg:363.60ms step:2186/6000 train_loss:3.7834 train_time:791082ms step_avg:363.55ms step:2187/6000 train_loss:3.4123 train_time:791336ms step_avg:363.50ms step:2188/6000 train_loss:3.4594 train_time:791591ms step_avg:363.45ms step:2189/6000 train_loss:3.3132 train_time:791846ms step_avg:363.40ms step:2190/6000 train_loss:3.4602 train_time:792100ms step_avg:363.35ms step:2191/6000 train_loss:3.6094 train_time:793427ms step_avg:363.79ms step:2192/6000 train_loss:3.5505 train_time:793680ms step_avg:363.74ms step:2193/6000 train_loss:3.7721 train_time:793934ms step_avg:363.69ms step:2194/6000 train_loss:3.5450 train_time:794188ms step_avg:363.64ms step:2195/6000 train_loss:3.6052 train_time:794443ms step_avg:363.59ms step:2196/6000 train_loss:3.5436 train_time:794697ms step_avg:363.54ms step:2197/6000 train_loss:3.4595 train_time:794952ms step_avg:363.49ms step:2198/6000 train_loss:3.5464 train_time:795206ms step_avg:363.44ms step:2199/6000 train_loss:3.4964 train_time:795462ms step_avg:363.39ms step:2200/6000 train_loss:3.4891 train_time:795716ms step_avg:363.34ms step:2201/6000 train_loss:3.5455 train_time:797042ms step_avg:363.78ms step:2202/6000 train_loss:3.5297 train_time:797294ms step_avg:363.73ms step:2203/6000 train_loss:3.5040 train_time:797549ms step_avg:363.68ms step:2204/6000 train_loss:4.0093 train_time:797803ms step_avg:363.63ms step:2205/6000 train_loss:3.4197 train_time:798059ms step_avg:363.58ms step:2206/6000 train_loss:3.5467 train_time:798313ms step_avg:363.53ms step:2207/6000 train_loss:3.5568 train_time:798568ms step_avg:363.48ms step:2208/6000 train_loss:3.5762 train_time:798823ms step_avg:363.43ms step:2209/6000 train_loss:3.4664 train_time:799077ms step_avg:363.38ms step:2210/6000 train_loss:3.5509 train_time:799332ms step_avg:363.33ms step:2211/6000 train_loss:3.5596 train_time:800659ms step_avg:363.77ms step:2212/6000 train_loss:3.5543 train_time:800911ms step_avg:363.72ms step:2213/6000 train_loss:3.5843 train_time:801167ms step_avg:363.67ms step:2214/6000 train_loss:3.4421 train_time:801421ms step_avg:363.62ms step:2215/6000 train_loss:3.5074 train_time:801677ms step_avg:363.57ms step:2216/6000 train_loss:3.6475 train_time:801933ms step_avg:363.52ms step:2217/6000 train_loss:3.5935 train_time:802186ms step_avg:363.47ms step:2218/6000 train_loss:3.5543 train_time:802442ms step_avg:363.42ms step:2219/6000 train_loss:3.5651 train_time:802696ms step_avg:363.38ms step:2220/6000 train_loss:3.4719 train_time:802952ms step_avg:363.33ms step:2221/6000 train_loss:3.7343 train_time:804279ms step_avg:363.76ms step:2222/6000 train_loss:3.6209 train_time:804532ms step_avg:363.71ms step:2223/6000 train_loss:3.6468 train_time:804786ms step_avg:363.66ms step:2224/6000 train_loss:3.5281 train_time:805042ms step_avg:363.61ms step:2225/6000 train_loss:3.6509 train_time:805296ms step_avg:363.56ms step:2226/6000 train_loss:3.4038 train_time:805551ms step_avg:363.52ms step:2227/6000 train_loss:3.6659 train_time:805805ms step_avg:363.47ms step:2228/6000 train_loss:3.6051 train_time:806061ms step_avg:363.42ms step:2229/6000 train_loss:3.4104 train_time:806315ms step_avg:363.37ms step:2230/6000 train_loss:3.7563 train_time:806569ms step_avg:363.32ms step:2231/6000 train_loss:3.4583 train_time:807895ms step_avg:363.75ms step:2232/6000 train_loss:3.9202 train_time:808147ms step_avg:363.70ms step:2233/6000 train_loss:3.6051 train_time:808401ms step_avg:363.65ms step:2234/6000 train_loss:3.5414 train_time:808656ms step_avg:363.60ms step:2235/6000 train_loss:3.5806 train_time:808910ms step_avg:363.56ms step:2236/6000 train_loss:3.3635 train_time:809167ms step_avg:363.51ms step:2237/6000 train_loss:3.3614 train_time:809421ms step_avg:363.46ms step:2238/6000 train_loss:3.5844 train_time:809676ms step_avg:363.41ms step:2239/6000 train_loss:3.6823 train_time:809932ms step_avg:363.36ms step:2240/6000 train_loss:3.4022 train_time:810186ms step_avg:363.31ms step:2241/6000 train_loss:3.4760 train_time:811512ms step_avg:363.74ms step:2242/6000 train_loss:3.6468 train_time:811766ms step_avg:363.69ms step:2243/6000 train_loss:3.6228 train_time:812021ms step_avg:363.65ms step:2244/6000 train_loss:3.4777 train_time:812275ms step_avg:363.60ms step:2245/6000 train_loss:3.5434 train_time:812530ms step_avg:363.55ms step:2246/6000 train_loss:3.5739 train_time:812785ms step_avg:363.50ms step:2247/6000 train_loss:3.4044 train_time:813040ms step_avg:363.45ms step:2248/6000 train_loss:3.4200 train_time:813294ms step_avg:363.40ms step:2249/6000 train_loss:3.6746 train_time:813548ms step_avg:363.35ms step:2250/6000 train_loss:3.4098 train_time:813803ms step_avg:363.30ms step:2250/6000 val_loss:3.5331 train_time:813838ms step_avg:363.32ms step:2251/6000 train_loss:3.4039 train_time:815131ms step_avg:363.74ms step:2252/6000 train_loss:3.4793 train_time:815384ms step_avg:363.69ms step:2253/6000 train_loss:3.4610 train_time:815638ms step_avg:363.64ms step:2254/6000 train_loss:3.5034 train_time:815893ms step_avg:363.59ms step:2255/6000 train_loss:3.5623 train_time:816149ms step_avg:363.54ms step:2256/6000 train_loss:3.4390 train_time:816403ms step_avg:363.49ms step:2257/6000 train_loss:3.7183 train_time:816673ms step_avg:363.45ms step:2258/6000 train_loss:3.6021 train_time:816927ms step_avg:363.40ms step:2259/6000 train_loss:3.9095 train_time:817184ms step_avg:363.35ms step:2260/6000 train_loss:3.6016 train_time:817440ms step_avg:363.31ms step:2261/6000 train_loss:3.6499 train_time:818765ms step_avg:363.73ms step:2262/6000 train_loss:3.5598 train_time:819019ms step_avg:363.69ms step:2263/6000 train_loss:3.5623 train_time:819274ms step_avg:363.64ms step:2264/6000 train_loss:3.3146 train_time:819528ms step_avg:363.59ms step:2265/6000 train_loss:3.4447 train_time:819783ms step_avg:363.54ms step:2266/6000 train_loss:3.6631 train_time:820038ms step_avg:363.49ms step:2267/6000 train_loss:3.3946 train_time:820292ms step_avg:363.44ms step:2268/6000 train_loss:3.4662 train_time:820676ms step_avg:363.45ms step:2269/6000 train_loss:3.4412 train_time:820930ms step_avg:363.40ms step:2270/6000 train_loss:3.4073 train_time:821186ms step_avg:363.36ms step:2271/6000 train_loss:3.8099 train_time:822512ms step_avg:363.78ms step:2272/6000 train_loss:3.4604 train_time:822764ms step_avg:363.73ms step:2273/6000 train_loss:3.4704 train_time:823018ms step_avg:363.68ms step:2274/6000 train_loss:3.5501 train_time:823272ms step_avg:363.64ms step:2275/6000 train_loss:3.5044 train_time:823527ms step_avg:363.59ms step:2276/6000 train_loss:3.5145 train_time:823784ms step_avg:363.54ms step:2277/6000 train_loss:3.3987 train_time:824038ms step_avg:363.49ms step:2278/6000 train_loss:3.5045 train_time:824293ms step_avg:363.44ms step:2279/6000 train_loss:3.6331 train_time:824547ms step_avg:363.40ms step:2280/6000 train_loss:3.4279 train_time:824956ms step_avg:363.42ms step:2281/6000 train_loss:3.4908 train_time:826283ms step_avg:363.84ms step:2282/6000 train_loss:3.5076 train_time:826534ms step_avg:363.79ms step:2283/6000 train_loss:3.6493 train_time:826789ms step_avg:363.74ms step:2284/6000 train_loss:3.5191 train_time:827044ms step_avg:363.70ms step:2285/6000 train_loss:3.5342 train_time:827299ms step_avg:363.65ms step:2286/6000 train_loss:3.5444 train_time:827554ms step_avg:363.60ms step:2287/6000 train_loss:3.5384 train_time:827809ms step_avg:363.55ms step:2288/6000 train_loss:3.4891 train_time:828063ms step_avg:363.50ms step:2289/6000 train_loss:3.6214 train_time:828318ms step_avg:363.46ms step:2290/6000 train_loss:3.6035 train_time:828574ms step_avg:363.41ms step:2291/6000 train_loss:3.4779 train_time:829900ms step_avg:363.83ms step:2292/6000 train_loss:3.8191 train_time:830152ms step_avg:363.78ms step:2293/6000 train_loss:3.4820 train_time:830405ms step_avg:363.73ms step:2294/6000 train_loss:3.4323 train_time:830660ms step_avg:363.69ms step:2295/6000 train_loss:3.6143 train_time:830914ms step_avg:363.64ms step:2296/6000 train_loss:3.5612 train_time:831169ms step_avg:363.59ms step:2297/6000 train_loss:3.6175 train_time:831423ms step_avg:363.54ms step:2298/6000 train_loss:3.9262 train_time:831678ms step_avg:363.50ms step:2299/6000 train_loss:3.4408 train_time:831932ms step_avg:363.45ms step:2300/6000 train_loss:3.4431 train_time:832187ms step_avg:363.40ms step:2301/6000 train_loss:3.7903 train_time:833513ms step_avg:363.82ms step:2302/6000 train_loss:3.5121 train_time:833766ms step_avg:363.77ms step:2303/6000 train_loss:3.5146 train_time:834020ms step_avg:363.72ms step:2304/6000 train_loss:3.5104 train_time:834275ms step_avg:363.68ms step:2305/6000 train_loss:3.4384 train_time:834530ms step_avg:363.63ms step:2306/6000 train_loss:3.6013 train_time:834784ms step_avg:363.58ms step:2307/6000 train_loss:3.4504 train_time:835039ms step_avg:363.53ms step:2308/6000 train_loss:3.4793 train_time:835295ms step_avg:363.49ms step:2309/6000 train_loss:3.6095 train_time:835549ms step_avg:363.44ms step:2310/6000 train_loss:3.5689 train_time:835804ms step_avg:363.39ms step:2311/6000 train_loss:3.4356 train_time:837132ms step_avg:363.81ms step:2312/6000 train_loss:3.5512 train_time:837383ms step_avg:363.76ms step:2313/6000 train_loss:3.6771 train_time:837637ms step_avg:363.72ms step:2314/6000 train_loss:3.4932 train_time:837892ms step_avg:363.67ms step:2315/6000 train_loss:3.4206 train_time:838146ms step_avg:363.62ms step:2316/6000 train_loss:3.5125 train_time:838402ms step_avg:363.57ms step:2317/6000 train_loss:3.3908 train_time:838657ms step_avg:363.53ms step:2318/6000 train_loss:3.4962 train_time:838911ms step_avg:363.48ms step:2319/6000 train_loss:3.5194 train_time:839166ms step_avg:363.43ms step:2320/6000 train_loss:3.3596 train_time:839422ms step_avg:363.39ms step:2321/6000 train_loss:3.5018 train_time:840748ms step_avg:363.80ms step:2322/6000 train_loss:3.5562 train_time:841002ms step_avg:363.76ms step:2323/6000 train_loss:3.4600 train_time:841257ms step_avg:363.71ms step:2324/6000 train_loss:3.5099 train_time:841511ms step_avg:363.66ms step:2325/6000 train_loss:3.4362 train_time:841765ms step_avg:363.61ms step:2326/6000 train_loss:3.5738 train_time:842021ms step_avg:363.57ms step:2327/6000 train_loss:3.5794 train_time:842275ms step_avg:363.52ms step:2328/6000 train_loss:3.3501 train_time:842531ms step_avg:363.47ms step:2329/6000 train_loss:3.4684 train_time:842785ms step_avg:363.43ms step:2330/6000 train_loss:3.4984 train_time:843040ms step_avg:363.38ms step:2331/6000 train_loss:3.4612 train_time:844367ms step_avg:363.79ms step:2332/6000 train_loss:3.6546 train_time:844620ms step_avg:363.75ms step:2333/6000 train_loss:3.5285 train_time:844874ms step_avg:363.70ms step:2334/6000 train_loss:3.5089 train_time:845129ms step_avg:363.65ms step:2335/6000 train_loss:3.5849 train_time:845383ms step_avg:363.61ms step:2336/6000 train_loss:3.4280 train_time:845637ms step_avg:363.56ms step:2337/6000 train_loss:3.5729 train_time:845892ms step_avg:363.51ms step:2338/6000 train_loss:3.5347 train_time:846148ms step_avg:363.47ms step:2339/6000 train_loss:3.4900 train_time:846402ms step_avg:363.42ms step:2340/6000 train_loss:3.5587 train_time:846657ms step_avg:363.37ms step:2341/6000 train_loss:3.6154 train_time:847983ms step_avg:363.79ms step:2342/6000 train_loss:3.4801 train_time:848236ms step_avg:363.74ms step:2343/6000 train_loss:3.4882 train_time:848490ms step_avg:363.69ms step:2344/6000 train_loss:3.5503 train_time:848745ms step_avg:363.64ms step:2345/6000 train_loss:3.5024 train_time:849000ms step_avg:363.60ms step:2346/6000 train_loss:3.6214 train_time:849254ms step_avg:363.55ms step:2347/6000 train_loss:3.5287 train_time:849508ms step_avg:363.50ms step:2348/6000 train_loss:3.6309 train_time:849763ms step_avg:363.46ms step:2349/6000 train_loss:3.5938 train_time:850017ms step_avg:363.41ms step:2350/6000 train_loss:3.6353 train_time:850272ms step_avg:363.36ms step:2351/6000 train_loss:3.3211 train_time:851599ms step_avg:363.78ms step:2352/6000 train_loss:3.4451 train_time:851850ms step_avg:363.73ms step:2353/6000 train_loss:3.4332 train_time:852104ms step_avg:363.68ms step:2354/6000 train_loss:3.6526 train_time:852359ms step_avg:363.63ms step:2355/6000 train_loss:3.4558 train_time:852613ms step_avg:363.59ms step:2356/6000 train_loss:3.4395 train_time:852868ms step_avg:363.54ms step:2357/6000 train_loss:3.5968 train_time:853122ms step_avg:363.49ms step:2358/6000 train_loss:3.4527 train_time:853378ms step_avg:363.45ms step:2359/6000 train_loss:3.5592 train_time:853632ms step_avg:363.40ms step:2360/6000 train_loss:3.4588 train_time:853888ms step_avg:363.36ms step:2361/6000 train_loss:3.4719 train_time:855214ms step_avg:363.77ms step:2362/6000 train_loss:3.5153 train_time:855467ms step_avg:363.72ms step:2363/6000 train_loss:3.5677 train_time:855722ms step_avg:363.67ms step:2364/6000 train_loss:3.5039 train_time:855977ms step_avg:363.63ms step:2365/6000 train_loss:3.9410 train_time:856230ms step_avg:363.58ms step:2366/6000 train_loss:3.5709 train_time:856485ms step_avg:363.53ms step:2367/6000 train_loss:3.7175 train_time:856740ms step_avg:363.49ms step:2368/6000 train_loss:3.5432 train_time:856995ms step_avg:363.44ms step:2369/6000 train_loss:3.5445 train_time:857249ms step_avg:363.40ms step:2370/6000 train_loss:3.5759 train_time:857506ms step_avg:363.35ms step:2371/6000 train_loss:3.4574 train_time:858832ms step_avg:363.76ms step:2372/6000 train_loss:3.6908 train_time:859085ms step_avg:363.71ms step:2373/6000 train_loss:3.5338 train_time:859340ms step_avg:363.66ms step:2374/6000 train_loss:4.0917 train_time:859594ms step_avg:363.62ms step:2375/6000 train_loss:3.5097 train_time:859849ms step_avg:363.57ms step:2375/6000 val_loss:3.5227 train_time:859883ms step_avg:363.59ms step:2376/6000 train_loss:3.4237 train_time:860106ms step_avg:363.53ms step:2377/6000 train_loss:3.5798 train_time:860362ms step_avg:363.48ms step:2378/6000 train_loss:3.5466 train_time:860617ms step_avg:363.44ms step:2379/6000 train_loss:3.5645 train_time:860872ms step_avg:363.39ms step:2380/6000 train_loss:3.5539 train_time:861129ms step_avg:363.35ms step:2381/6000 train_loss:3.4462 train_time:862454ms step_avg:363.75ms step:2382/6000 train_loss:3.5412 train_time:862708ms step_avg:363.70ms step:2383/6000 train_loss:3.5629 train_time:862962ms step_avg:363.66ms step:2384/6000 train_loss:3.5132 train_time:863218ms step_avg:363.61ms step:2385/6000 train_loss:3.4352 train_time:863472ms step_avg:363.57ms step:2386/6000 train_loss:3.5513 train_time:863725ms step_avg:363.52ms step:2387/6000 train_loss:3.5104 train_time:863980ms step_avg:363.47ms step:2388/6000 train_loss:3.5103 train_time:864236ms step_avg:363.43ms step:2389/6000 train_loss:3.5413 train_time:864491ms step_avg:363.38ms step:2390/6000 train_loss:3.5302 train_time:864746ms step_avg:363.34ms step:2391/6000 train_loss:3.5227 train_time:866073ms step_avg:363.74ms step:2392/6000 train_loss:3.4045 train_time:866325ms step_avg:363.70ms step:2393/6000 train_loss:3.6238 train_time:866580ms step_avg:363.65ms step:2394/6000 train_loss:3.4616 train_time:866834ms step_avg:363.60ms step:2395/6000 train_loss:3.5606 train_time:867088ms step_avg:363.56ms step:2396/6000 train_loss:3.6721 train_time:867343ms step_avg:363.51ms step:2397/6000 train_loss:3.6966 train_time:867598ms step_avg:363.47ms step:2398/6000 train_loss:3.6393 train_time:867852ms step_avg:363.42ms step:2399/6000 train_loss:3.6069 train_time:868111ms step_avg:363.38ms step:2400/6000 train_loss:3.4819 train_time:868365ms step_avg:363.33ms step:2401/6000 train_loss:3.4845 train_time:869693ms step_avg:363.74ms step:2402/6000 train_loss:3.5928 train_time:869945ms step_avg:363.69ms step:2403/6000 train_loss:3.4267 train_time:870199ms step_avg:363.64ms step:2404/6000 train_loss:3.5643 train_time:870453ms step_avg:363.60ms step:2405/6000 train_loss:3.7797 train_time:870709ms step_avg:363.55ms step:2406/6000 train_loss:3.5061 train_time:870963ms step_avg:363.51ms step:2407/6000 train_loss:3.6548 train_time:871218ms step_avg:363.46ms step:2408/6000 train_loss:3.5100 train_time:871472ms step_avg:363.42ms step:2409/6000 train_loss:3.4484 train_time:871729ms step_avg:363.37ms step:2410/6000 train_loss:3.5764 train_time:871983ms step_avg:363.33ms step:2411/6000 train_loss:3.3802 train_time:873309ms step_avg:363.73ms step:2412/6000 train_loss:3.8009 train_time:873561ms step_avg:363.68ms step:2413/6000 train_loss:3.4857 train_time:873817ms step_avg:363.64ms step:2414/6000 train_loss:3.5618 train_time:874071ms step_avg:363.59ms step:2415/6000 train_loss:3.4822 train_time:874326ms step_avg:363.54ms step:2416/6000 train_loss:3.5551 train_time:874579ms step_avg:363.50ms step:2417/6000 train_loss:3.3675 train_time:874834ms step_avg:363.45ms step:2418/6000 train_loss:3.3007 train_time:875089ms step_avg:363.41ms step:2419/6000 train_loss:3.5920 train_time:875343ms step_avg:363.36ms step:2420/6000 train_loss:3.4745 train_time:875598ms step_avg:363.32ms step:2421/6000 train_loss:3.5094 train_time:876925ms step_avg:363.72ms step:2422/6000 train_loss:3.6092 train_time:877177ms step_avg:363.67ms step:2423/6000 train_loss:3.6553 train_time:877432ms step_avg:363.63ms step:2424/6000 train_loss:3.4757 train_time:877687ms step_avg:363.58ms step:2425/6000 train_loss:3.5616 train_time:877942ms step_avg:363.54ms step:2426/6000 train_loss:3.5655 train_time:878198ms step_avg:363.49ms step:2427/6000 train_loss:3.4904 train_time:878452ms step_avg:363.45ms step:2428/6000 train_loss:3.4422 train_time:878708ms step_avg:363.40ms step:2429/6000 train_loss:3.5658 train_time:878961ms step_avg:363.36ms step:2430/6000 train_loss:3.4610 train_time:879217ms step_avg:363.31ms step:2431/6000 train_loss:3.5177 train_time:880543ms step_avg:363.71ms step:2432/6000 train_loss:3.5766 train_time:880793ms step_avg:363.66ms step:2433/6000 train_loss:3.5312 train_time:881049ms step_avg:363.62ms step:2434/6000 train_loss:3.4123 train_time:881303ms step_avg:363.57ms step:2435/6000 train_loss:3.3753 train_time:881557ms step_avg:363.53ms step:2436/6000 train_loss:3.5423 train_time:881812ms step_avg:363.48ms step:2437/6000 train_loss:3.4024 train_time:882067ms step_avg:363.44ms step:2438/6000 train_loss:3.4803 train_time:882321ms step_avg:363.39ms step:2439/6000 train_loss:3.5640 train_time:882575ms step_avg:363.35ms step:2440/6000 train_loss:3.4898 train_time:882831ms step_avg:363.30ms step:2441/6000 train_loss:3.5792 train_time:884157ms step_avg:363.70ms step:2442/6000 train_loss:3.4604 train_time:884411ms step_avg:363.66ms step:2443/6000 train_loss:3.5245 train_time:884665ms step_avg:363.61ms step:2444/6000 train_loss:3.4008 train_time:884920ms step_avg:363.57ms step:2445/6000 train_loss:3.4137 train_time:885174ms step_avg:363.52ms step:2446/6000 train_loss:3.5809 train_time:885429ms step_avg:363.48ms step:2447/6000 train_loss:3.4316 train_time:885683ms step_avg:363.43ms step:2448/6000 train_loss:3.5097 train_time:885938ms step_avg:363.39ms step:2449/6000 train_loss:3.6752 train_time:886192ms step_avg:363.34ms step:2450/6000 train_loss:3.5039 train_time:886448ms step_avg:363.30ms step:2451/6000 train_loss:3.5774 train_time:887775ms step_avg:363.69ms step:2452/6000 train_loss:3.4786 train_time:888028ms step_avg:363.65ms step:2453/6000 train_loss:3.5743 train_time:888282ms step_avg:363.60ms step:2454/6000 train_loss:3.4729 train_time:888537ms step_avg:363.56ms step:2455/6000 train_loss:3.6099 train_time:888791ms step_avg:363.51ms step:2456/6000 train_loss:3.5265 train_time:889046ms step_avg:363.47ms step:2457/6000 train_loss:3.4604 train_time:889442ms step_avg:363.48ms step:2458/6000 train_loss:3.3906 train_time:889696ms step_avg:363.44ms step:2459/6000 train_loss:3.5119 train_time:889951ms step_avg:363.39ms step:2460/6000 train_loss:4.1163 train_time:890206ms step_avg:363.35ms step:2461/6000 train_loss:3.5833 train_time:891531ms step_avg:363.74ms step:2462/6000 train_loss:3.3921 train_time:891784ms step_avg:363.70ms step:2463/6000 train_loss:3.5885 train_time:892038ms step_avg:363.65ms step:2464/6000 train_loss:3.5051 train_time:892292ms step_avg:363.61ms step:2465/6000 train_loss:3.7015 train_time:892548ms step_avg:363.56ms step:2466/6000 train_loss:3.9177 train_time:892802ms step_avg:363.52ms step:2467/6000 train_loss:3.6181 train_time:893057ms step_avg:363.47ms step:2468/6000 train_loss:3.4896 train_time:893312ms step_avg:363.43ms step:2469/6000 train_loss:3.6051 train_time:893567ms step_avg:363.39ms step:2470/6000 train_loss:3.6254 train_time:893974ms step_avg:363.40ms step:2471/6000 train_loss:3.4255 train_time:895302ms step_avg:363.80ms step:2472/6000 train_loss:3.5137 train_time:895554ms step_avg:363.75ms step:2473/6000 train_loss:3.5078 train_time:895809ms step_avg:363.71ms step:2474/6000 train_loss:3.6563 train_time:896062ms step_avg:363.66ms step:2475/6000 train_loss:3.7894 train_time:896318ms step_avg:363.62ms step:2476/6000 train_loss:3.3813 train_time:896574ms step_avg:363.57ms step:2477/6000 train_loss:3.5831 train_time:896831ms step_avg:363.53ms step:2478/6000 train_loss:3.5487 train_time:897085ms step_avg:363.49ms step:2479/6000 train_loss:3.3855 train_time:897340ms step_avg:363.44ms step:2480/6000 train_loss:3.3810 train_time:897597ms step_avg:363.40ms step:2481/6000 train_loss:3.5331 train_time:898923ms step_avg:363.79ms step:2482/6000 train_loss:3.5474 train_time:899174ms step_avg:363.74ms step:2483/6000 train_loss:3.5523 train_time:899428ms step_avg:363.70ms step:2484/6000 train_loss:3.5026 train_time:899681ms step_avg:363.65ms step:2485/6000 train_loss:3.5266 train_time:899935ms step_avg:363.61ms step:2486/6000 train_loss:3.4057 train_time:900189ms step_avg:363.57ms step:2487/6000 train_loss:3.6002 train_time:900445ms step_avg:363.52ms step:2488/6000 train_loss:3.5701 train_time:900699ms step_avg:363.48ms step:2489/6000 train_loss:3.4665 train_time:900953ms step_avg:363.43ms step:2490/6000 train_loss:3.5724 train_time:901209ms step_avg:363.39ms step:2491/6000 train_loss:3.6315 train_time:902534ms step_avg:363.78ms step:2492/6000 train_loss:3.7132 train_time:902785ms step_avg:363.73ms step:2493/6000 train_loss:3.5569 train_time:903039ms step_avg:363.69ms step:2494/6000 train_loss:3.4786 train_time:903293ms step_avg:363.64ms step:2495/6000 train_loss:3.6098 train_time:903549ms step_avg:363.60ms step:2496/6000 train_loss:3.5541 train_time:903804ms step_avg:363.56ms step:2497/6000 train_loss:3.4661 train_time:904058ms step_avg:363.51ms step:2498/6000 train_loss:3.5676 train_time:904313ms step_avg:363.47ms step:2499/6000 train_loss:3.6153 train_time:904568ms step_avg:363.43ms step:2500/6000 train_loss:3.6397 train_time:904822ms step_avg:363.38ms step:2500/6000 val_loss:3.5129 train_time:904856ms step_avg:363.40ms step:2501/6000 train_loss:3.5811 train_time:906152ms step_avg:363.77ms step:2502/6000 train_loss:3.5317 train_time:906403ms step_avg:363.73ms step:2503/6000 train_loss:3.5502 train_time:906658ms step_avg:363.68ms step:2504/6000 train_loss:3.4222 train_time:906913ms step_avg:363.64ms step:2505/6000 train_loss:3.6157 train_time:907168ms step_avg:363.59ms step:2506/6000 train_loss:3.5610 train_time:907422ms step_avg:363.55ms step:2507/6000 train_loss:3.5100 train_time:907679ms step_avg:363.51ms step:2508/6000 train_loss:3.5193 train_time:907934ms step_avg:363.46ms step:2509/6000 train_loss:3.4835 train_time:908187ms step_avg:363.42ms step:2510/6000 train_loss:3.6501 train_time:908442ms step_avg:363.38ms step:2511/6000 train_loss:3.4797 train_time:909768ms step_avg:363.76ms step:2512/6000 train_loss:3.4661 train_time:910019ms step_avg:363.72ms step:2513/6000 train_loss:3.5335 train_time:910274ms step_avg:363.67ms step:2514/6000 train_loss:3.5727 train_time:910528ms step_avg:363.63ms step:2515/6000 train_loss:3.4735 train_time:910782ms step_avg:363.59ms step:2516/6000 train_loss:3.5627 train_time:911038ms step_avg:363.54ms step:2517/6000 train_loss:3.5603 train_time:911294ms step_avg:363.50ms step:2518/6000 train_loss:3.4331 train_time:911549ms step_avg:363.46ms step:2519/6000 train_loss:3.4638 train_time:911803ms step_avg:363.41ms step:2520/6000 train_loss:3.5871 train_time:912059ms step_avg:363.37ms step:2521/6000 train_loss:3.5759 train_time:913387ms step_avg:363.75ms step:2522/6000 train_loss:3.4569 train_time:913639ms step_avg:363.71ms step:2523/6000 train_loss:3.4392 train_time:913893ms step_avg:363.67ms step:2524/6000 train_loss:3.5425 train_time:914148ms step_avg:363.62ms step:2525/6000 train_loss:3.3799 train_time:914402ms step_avg:363.58ms step:2526/6000 train_loss:3.6017 train_time:914658ms step_avg:363.54ms step:2527/6000 train_loss:3.5099 train_time:914912ms step_avg:363.49ms step:2528/6000 train_loss:3.5110 train_time:915168ms step_avg:363.45ms step:2529/6000 train_loss:3.4960 train_time:915422ms step_avg:363.41ms step:2530/6000 train_loss:3.5193 train_time:915678ms step_avg:363.36ms step:2531/6000 train_loss:3.5558 train_time:917005ms step_avg:363.75ms step:2532/6000 train_loss:3.3773 train_time:917258ms step_avg:363.70ms step:2533/6000 train_loss:3.5375 train_time:917512ms step_avg:363.66ms step:2534/6000 train_loss:3.4325 train_time:917767ms step_avg:363.62ms step:2535/6000 train_loss:3.4682 train_time:918021ms step_avg:363.57ms step:2536/6000 train_loss:3.5227 train_time:918276ms step_avg:363.53ms step:2537/6000 train_loss:3.5342 train_time:918532ms step_avg:363.49ms step:2538/6000 train_loss:3.3523 train_time:918785ms step_avg:363.44ms step:2539/6000 train_loss:3.6680 train_time:919041ms step_avg:363.40ms step:2540/6000 train_loss:3.3494 train_time:919295ms step_avg:363.36ms step:2541/6000 train_loss:3.5346 train_time:920622ms step_avg:363.74ms step:2542/6000 train_loss:3.3068 train_time:920874ms step_avg:363.69ms step:2543/6000 train_loss:3.7384 train_time:921129ms step_avg:363.65ms step:2544/6000 train_loss:3.5033 train_time:921383ms step_avg:363.61ms step:2545/6000 train_loss:3.6628 train_time:921638ms step_avg:363.57ms step:2546/6000 train_loss:3.4939 train_time:921893ms step_avg:363.52ms step:2547/6000 train_loss:3.4855 train_time:922148ms step_avg:363.48ms step:2548/6000 train_loss:3.4822 train_time:922402ms step_avg:363.44ms step:2549/6000 train_loss:3.6465 train_time:922657ms step_avg:363.39ms step:2550/6000 train_loss:3.5035 train_time:922916ms step_avg:363.35ms step:2551/6000 train_loss:3.5033 train_time:924244ms step_avg:363.73ms step:2552/6000 train_loss:3.5308 train_time:924495ms step_avg:363.69ms step:2553/6000 train_loss:3.5536 train_time:924750ms step_avg:363.65ms step:2554/6000 train_loss:3.4674 train_time:925005ms step_avg:363.60ms step:2555/6000 train_loss:3.5682 train_time:925259ms step_avg:363.56ms step:2556/6000 train_loss:3.6259 train_time:925514ms step_avg:363.52ms step:2557/6000 train_loss:3.6128 train_time:925769ms step_avg:363.47ms step:2558/6000 train_loss:3.4545 train_time:926024ms step_avg:363.43ms step:2559/6000 train_loss:3.4493 train_time:926278ms step_avg:363.39ms step:2560/6000 train_loss:3.4645 train_time:926534ms step_avg:363.35ms step:2561/6000 train_loss:3.5837 train_time:927860ms step_avg:363.72ms step:2562/6000 train_loss:3.6202 train_time:928112ms step_avg:363.68ms step:2563/6000 train_loss:3.5007 train_time:928367ms step_avg:363.64ms step:2564/6000 train_loss:3.5361 train_time:928622ms step_avg:363.60ms step:2565/6000 train_loss:3.4421 train_time:928876ms step_avg:363.55ms step:2566/6000 train_loss:3.4576 train_time:929132ms step_avg:363.51ms step:2567/6000 train_loss:3.4470 train_time:929385ms step_avg:363.47ms step:2568/6000 train_loss:3.4992 train_time:929640ms step_avg:363.42ms step:2569/6000 train_loss:3.6391 train_time:929894ms step_avg:363.38ms step:2570/6000 train_loss:3.5484 train_time:930150ms step_avg:363.34ms step:2571/6000 train_loss:3.6377 train_time:931476ms step_avg:363.72ms step:2572/6000 train_loss:3.3881 train_time:931729ms step_avg:363.67ms step:2573/6000 train_loss:3.4948 train_time:931984ms step_avg:363.63ms step:2574/6000 train_loss:3.1607 train_time:932240ms step_avg:363.59ms step:2575/6000 train_loss:3.4066 train_time:932494ms step_avg:363.55ms step:2576/6000 train_loss:3.3402 train_time:932750ms step_avg:363.50ms step:2577/6000 train_loss:3.4591 train_time:933004ms step_avg:363.46ms step:2578/6000 train_loss:3.5146 train_time:933259ms step_avg:363.42ms step:2579/6000 train_loss:3.4194 train_time:933514ms step_avg:363.38ms step:2580/6000 train_loss:3.4746 train_time:933769ms step_avg:363.33ms step:2581/6000 train_loss:3.4270 train_time:935096ms step_avg:363.71ms step:2582/6000 train_loss:3.5242 train_time:935346ms step_avg:363.66ms step:2583/6000 train_loss:3.4000 train_time:935601ms step_avg:363.62ms step:2584/6000 train_loss:3.5977 train_time:935856ms step_avg:363.58ms step:2585/6000 train_loss:3.5178 train_time:936110ms step_avg:363.54ms step:2586/6000 train_loss:3.5268 train_time:936365ms step_avg:363.50ms step:2587/6000 train_loss:3.6502 train_time:936620ms step_avg:363.45ms step:2588/6000 train_loss:3.5387 train_time:936874ms step_avg:363.41ms step:2589/6000 train_loss:3.3983 train_time:937130ms step_avg:363.37ms step:2590/6000 train_loss:3.5639 train_time:937384ms step_avg:363.33ms step:2591/6000 train_loss:3.4720 train_time:938712ms step_avg:363.70ms step:2592/6000 train_loss:3.6743 train_time:938963ms step_avg:363.66ms step:2593/6000 train_loss:3.5494 train_time:939218ms step_avg:363.61ms step:2594/6000 train_loss:3.3672 train_time:939474ms step_avg:363.57ms step:2595/6000 train_loss:3.4379 train_time:939729ms step_avg:363.53ms step:2596/6000 train_loss:3.9104 train_time:939983ms step_avg:363.49ms step:2597/6000 train_loss:3.5192 train_time:940238ms step_avg:363.45ms step:2598/6000 train_loss:3.5287 train_time:940493ms step_avg:363.41ms step:2599/6000 train_loss:3.3753 train_time:940748ms step_avg:363.36ms step:2600/6000 train_loss:3.6335 train_time:941003ms step_avg:363.32ms step:2601/6000 train_loss:3.7879 train_time:942328ms step_avg:363.69ms step:2602/6000 train_loss:3.3678 train_time:942580ms step_avg:363.65ms step:2603/6000 train_loss:3.5062 train_time:942834ms step_avg:363.61ms step:2604/6000 train_loss:3.3322 train_time:943089ms step_avg:363.57ms step:2605/6000 train_loss:3.6252 train_time:943343ms step_avg:363.52ms step:2606/6000 train_loss:3.5093 train_time:943597ms step_avg:363.48ms step:2607/6000 train_loss:3.4053 train_time:943854ms step_avg:363.44ms step:2608/6000 train_loss:3.3460 train_time:944107ms step_avg:363.40ms step:2609/6000 train_loss:3.4756 train_time:944361ms step_avg:363.36ms step:2610/6000 train_loss:3.6543 train_time:944616ms step_avg:363.31ms step:2611/6000 train_loss:3.5332 train_time:945942ms step_avg:363.68ms step:2612/6000 train_loss:3.3473 train_time:946193ms step_avg:363.64ms step:2613/6000 train_loss:3.4532 train_time:946448ms step_avg:363.60ms step:2614/6000 train_loss:3.5595 train_time:946702ms step_avg:363.56ms step:2615/6000 train_loss:3.4976 train_time:946957ms step_avg:363.52ms step:2616/6000 train_loss:3.4887 train_time:947212ms step_avg:363.47ms step:2617/6000 train_loss:3.5309 train_time:947467ms step_avg:363.43ms step:2618/6000 train_loss:3.5743 train_time:947723ms step_avg:363.39ms step:2619/6000 train_loss:3.4166 train_time:947977ms step_avg:363.35ms step:2620/6000 train_loss:3.5978 train_time:948233ms step_avg:363.31ms step:2621/6000 train_loss:3.5529 train_time:949559ms step_avg:363.68ms step:2622/6000 train_loss:3.6826 train_time:949811ms step_avg:363.63ms step:2623/6000 train_loss:3.5905 train_time:950066ms step_avg:363.59ms step:2624/6000 train_loss:3.5083 train_time:950320ms step_avg:363.55ms step:2625/6000 train_loss:3.4669 train_time:950577ms step_avg:363.51ms step:2625/6000 val_loss:3.5019 train_time:950611ms step_avg:363.52ms step:2626/6000 train_loss:3.4952 train_time:950833ms step_avg:363.47ms step:2627/6000 train_loss:3.5500 train_time:951090ms step_avg:363.43ms step:2628/6000 train_loss:3.3879 train_time:951343ms step_avg:363.39ms step:2629/6000 train_loss:3.6376 train_time:951602ms step_avg:363.35ms step:2630/6000 train_loss:3.5279 train_time:951857ms step_avg:363.30ms step:2631/6000 train_loss:3.5870 train_time:953183ms step_avg:363.67ms step:2632/6000 train_loss:3.8026 train_time:953434ms step_avg:363.63ms step:2633/6000 train_loss:3.5413 train_time:953689ms step_avg:363.59ms step:2634/6000 train_loss:3.4640 train_time:953944ms step_avg:363.55ms step:2635/6000 train_loss:3.4359 train_time:954199ms step_avg:363.50ms step:2636/6000 train_loss:3.4837 train_time:954454ms step_avg:363.46ms step:2637/6000 train_loss:3.2693 train_time:954710ms step_avg:363.42ms step:2638/6000 train_loss:3.5760 train_time:954964ms step_avg:363.38ms step:2639/6000 train_loss:3.5527 train_time:955218ms step_avg:363.34ms step:2640/6000 train_loss:3.4518 train_time:955475ms step_avg:363.30ms step:2641/6000 train_loss:3.5304 train_time:956802ms step_avg:363.66ms step:2642/6000 train_loss:3.5656 train_time:957056ms step_avg:363.62ms step:2643/6000 train_loss:3.3533 train_time:957311ms step_avg:363.58ms step:2644/6000 train_loss:3.4701 train_time:957565ms step_avg:363.54ms step:2645/6000 train_loss:3.5431 train_time:957819ms step_avg:363.50ms step:2646/6000 train_loss:3.5132 train_time:958215ms step_avg:363.51ms step:2647/6000 train_loss:3.3938 train_time:958470ms step_avg:363.47ms step:2648/6000 train_loss:3.6343 train_time:958724ms step_avg:363.43ms step:2649/6000 train_loss:3.8839 train_time:958978ms step_avg:363.39ms step:2650/6000 train_loss:3.5244 train_time:959233ms step_avg:363.35ms step:2651/6000 train_loss:3.4848 train_time:960559ms step_avg:363.71ms step:2652/6000 train_loss:3.6212 train_time:960813ms step_avg:363.67ms step:2653/6000 train_loss:3.4560 train_time:961067ms step_avg:363.63ms step:2654/6000 train_loss:3.4359 train_time:961321ms step_avg:363.59ms step:2655/6000 train_loss:3.5112 train_time:961575ms step_avg:363.54ms step:2656/6000 train_loss:3.4287 train_time:961830ms step_avg:363.50ms step:2657/6000 train_loss:3.4691 train_time:962085ms step_avg:363.46ms step:2658/6000 train_loss:3.4235 train_time:962339ms step_avg:363.42ms step:2659/6000 train_loss:3.5185 train_time:962594ms step_avg:363.38ms step:2660/6000 train_loss:3.6601 train_time:963013ms step_avg:363.40ms step:2661/6000 train_loss:3.4580 train_time:964340ms step_avg:363.76ms step:2662/6000 train_loss:3.6076 train_time:964594ms step_avg:363.72ms step:2663/6000 train_loss:3.4706 train_time:964847ms step_avg:363.68ms step:2664/6000 train_loss:3.4672 train_time:965102ms step_avg:363.64ms step:2665/6000 train_loss:3.3967 train_time:965356ms step_avg:363.60ms step:2666/6000 train_loss:3.4477 train_time:965611ms step_avg:363.56ms step:2667/6000 train_loss:3.4822 train_time:965866ms step_avg:363.52ms step:2668/6000 train_loss:3.5328 train_time:966120ms step_avg:363.48ms step:2669/6000 train_loss:3.4475 train_time:966374ms step_avg:363.44ms step:2670/6000 train_loss:3.5123 train_time:966629ms step_avg:363.39ms step:2671/6000 train_loss:3.3833 train_time:967957ms step_avg:363.76ms step:2672/6000 train_loss:3.4641 train_time:968208ms step_avg:363.71ms step:2673/6000 train_loss:3.4416 train_time:968463ms step_avg:363.67ms step:2674/6000 train_loss:3.5018 train_time:968716ms step_avg:363.63ms step:2675/6000 train_loss:3.5278 train_time:968972ms step_avg:363.59ms step:2676/6000 train_loss:3.4989 train_time:969226ms step_avg:363.55ms step:2677/6000 train_loss:3.4850 train_time:969482ms step_avg:363.51ms step:2678/6000 train_loss:3.5268 train_time:969737ms step_avg:363.47ms step:2679/6000 train_loss:3.5625 train_time:969992ms step_avg:363.43ms step:2680/6000 train_loss:3.4744 train_time:970247ms step_avg:363.39ms step:2681/6000 train_loss:3.4086 train_time:971573ms step_avg:363.75ms step:2682/6000 train_loss:3.4451 train_time:971824ms step_avg:363.71ms step:2683/6000 train_loss:3.9181 train_time:972079ms step_avg:363.67ms step:2684/6000 train_loss:3.4977 train_time:972333ms step_avg:363.62ms step:2685/6000 train_loss:3.5257 train_time:972588ms step_avg:363.58ms step:2686/6000 train_loss:3.5780 train_time:972842ms step_avg:363.54ms step:2687/6000 train_loss:3.4923 train_time:973097ms step_avg:363.50ms step:2688/6000 train_loss:3.5749 train_time:973353ms step_avg:363.46ms step:2689/6000 train_loss:3.5082 train_time:973607ms step_avg:363.42ms step:2690/6000 train_loss:3.4997 train_time:973862ms step_avg:363.38ms step:2691/6000 train_loss:3.5257 train_time:975189ms step_avg:363.74ms step:2692/6000 train_loss:3.5977 train_time:975439ms step_avg:363.70ms step:2693/6000 train_loss:3.3989 train_time:975695ms step_avg:363.66ms step:2694/6000 train_loss:3.7667 train_time:975949ms step_avg:363.62ms step:2695/6000 train_loss:3.5744 train_time:976205ms step_avg:363.58ms step:2696/6000 train_loss:3.4106 train_time:976459ms step_avg:363.54ms step:2697/6000 train_loss:3.5584 train_time:976715ms step_avg:363.50ms step:2698/6000 train_loss:3.5277 train_time:976970ms step_avg:363.46ms step:2699/6000 train_loss:3.4756 train_time:977224ms step_avg:363.42ms step:2700/6000 train_loss:3.5782 train_time:977480ms step_avg:363.38ms step:2701/6000 train_loss:3.5415 train_time:978806ms step_avg:363.73ms step:2702/6000 train_loss:3.4526 train_time:979057ms step_avg:363.69ms step:2703/6000 train_loss:3.4842 train_time:979313ms step_avg:363.65ms step:2704/6000 train_loss:3.4901 train_time:979567ms step_avg:363.61ms step:2705/6000 train_loss:3.4465 train_time:979821ms step_avg:363.57ms step:2706/6000 train_loss:3.6412 train_time:980077ms step_avg:363.53ms step:2707/6000 train_loss:3.5860 train_time:980332ms step_avg:363.49ms step:2708/6000 train_loss:3.5035 train_time:980589ms step_avg:363.45ms step:2709/6000 train_loss:3.4885 train_time:980843ms step_avg:363.41ms step:2710/6000 train_loss:3.5902 train_time:981098ms step_avg:363.37ms step:2711/6000 train_loss:3.4700 train_time:982425ms step_avg:363.73ms step:2712/6000 train_loss:3.5858 train_time:982677ms step_avg:363.69ms step:2713/6000 train_loss:3.3179 train_time:982931ms step_avg:363.64ms step:2714/6000 train_loss:3.5180 train_time:983186ms step_avg:363.60ms step:2715/6000 train_loss:3.4131 train_time:983440ms step_avg:363.56ms step:2716/6000 train_loss:3.4165 train_time:983696ms step_avg:363.52ms step:2717/6000 train_loss:3.6077 train_time:983950ms step_avg:363.48ms step:2718/6000 train_loss:3.5026 train_time:984204ms step_avg:363.44ms step:2719/6000 train_loss:3.7379 train_time:984458ms step_avg:363.40ms step:2720/6000 train_loss:3.4684 train_time:984715ms step_avg:363.36ms step:2721/6000 train_loss:3.4787 train_time:986042ms step_avg:363.72ms step:2722/6000 train_loss:3.7085 train_time:986295ms step_avg:363.68ms step:2723/6000 train_loss:3.4732 train_time:986548ms step_avg:363.64ms step:2724/6000 train_loss:3.6463 train_time:986803ms step_avg:363.60ms step:2725/6000 train_loss:3.5237 train_time:987057ms step_avg:363.56ms step:2726/6000 train_loss:3.4842 train_time:987312ms step_avg:363.52ms step:2727/6000 train_loss:3.4854 train_time:987567ms step_avg:363.48ms step:2728/6000 train_loss:3.8290 train_time:987822ms step_avg:363.44ms step:2729/6000 train_loss:3.5584 train_time:988077ms step_avg:363.40ms step:2730/6000 train_loss:3.4335 train_time:988332ms step_avg:363.36ms step:2731/6000 train_loss:3.5334 train_time:989659ms step_avg:363.71ms step:2732/6000 train_loss:3.4393 train_time:989909ms step_avg:363.67ms step:2733/6000 train_loss:3.3303 train_time:990163ms step_avg:363.63ms step:2734/6000 train_loss:3.4410 train_time:990417ms step_avg:363.59ms step:2735/6000 train_loss:3.5177 train_time:990672ms step_avg:363.55ms step:2736/6000 train_loss:3.4107 train_time:990926ms step_avg:363.51ms step:2737/6000 train_loss:3.8130 train_time:991181ms step_avg:363.47ms step:2738/6000 train_loss:3.5556 train_time:991436ms step_avg:363.43ms step:2739/6000 train_loss:3.7588 train_time:991691ms step_avg:363.39ms step:2740/6000 train_loss:3.5009 train_time:991945ms step_avg:363.35ms step:2741/6000 train_loss:3.4999 train_time:993272ms step_avg:363.70ms step:2742/6000 train_loss:3.4371 train_time:993524ms step_avg:363.66ms step:2743/6000 train_loss:3.5093 train_time:993779ms step_avg:363.62ms step:2744/6000 train_loss:3.5223 train_time:994033ms step_avg:363.58ms step:2745/6000 train_loss:3.6205 train_time:994288ms step_avg:363.54ms step:2746/6000 train_loss:3.3866 train_time:994542ms step_avg:363.50ms step:2747/6000 train_loss:3.4770 train_time:994797ms step_avg:363.46ms step:2748/6000 train_loss:3.5243 train_time:995052ms step_avg:363.42ms step:2749/6000 train_loss:3.6331 train_time:995308ms step_avg:363.38ms step:2750/6000 train_loss:3.4706 train_time:995563ms step_avg:363.34ms step:2750/6000 val_loss:3.4925 train_time:995597ms step_avg:363.36ms step:2751/6000 train_loss:3.5386 train_time:996893ms step_avg:363.70ms step:2752/6000 train_loss:3.5976 train_time:997144ms step_avg:363.66ms step:2753/6000 train_loss:3.5139 train_time:997398ms step_avg:363.62ms step:2754/6000 train_loss:3.4368 train_time:997652ms step_avg:363.58ms step:2755/6000 train_loss:3.4343 train_time:997907ms step_avg:363.54ms step:2756/6000 train_loss:3.5237 train_time:998161ms step_avg:363.50ms step:2757/6000 train_loss:3.4669 train_time:998417ms step_avg:363.46ms step:2758/6000 train_loss:3.3378 train_time:998671ms step_avg:363.42ms step:2759/6000 train_loss:3.7327 train_time:998929ms step_avg:363.38ms step:2760/6000 train_loss:3.5436 train_time:999183ms step_avg:363.34ms step:2761/6000 train_loss:3.4968 train_time:1000510ms step_avg:363.69ms step:2762/6000 train_loss:3.4827 train_time:1000764ms step_avg:363.65ms step:2763/6000 train_loss:3.3968 train_time:1001019ms step_avg:363.61ms step:2764/6000 train_loss:3.5586 train_time:1001274ms step_avg:363.57ms step:2765/6000 train_loss:3.4788 train_time:1001529ms step_avg:363.53ms step:2766/6000 train_loss:3.3753 train_time:1001784ms step_avg:363.49ms step:2767/6000 train_loss:3.4645 train_time:1002038ms step_avg:363.45ms step:2768/6000 train_loss:3.5513 train_time:1002293ms step_avg:363.41ms step:2769/6000 train_loss:3.4303 train_time:1002549ms step_avg:363.37ms step:2770/6000 train_loss:3.5056 train_time:1002804ms step_avg:363.33ms step:2771/6000 train_loss:3.4800 train_time:1004131ms step_avg:363.68ms step:2772/6000 train_loss:3.9153 train_time:1004383ms step_avg:363.64ms step:2773/6000 train_loss:3.3849 train_time:1004638ms step_avg:363.60ms step:2774/6000 train_loss:3.5209 train_time:1004891ms step_avg:363.56ms step:2775/6000 train_loss:3.5777 train_time:1005148ms step_avg:363.53ms step:2776/6000 train_loss:3.5563 train_time:1005402ms step_avg:363.49ms step:2777/6000 train_loss:3.6272 train_time:1005656ms step_avg:363.45ms step:2778/6000 train_loss:3.6334 train_time:1005912ms step_avg:363.41ms step:2779/6000 train_loss:3.4965 train_time:1006167ms step_avg:363.37ms step:2780/6000 train_loss:3.3659 train_time:1006422ms step_avg:363.33ms step:2781/6000 train_loss:3.5157 train_time:1007747ms step_avg:363.68ms step:2782/6000 train_loss:3.5369 train_time:1008000ms step_avg:363.64ms step:2783/6000 train_loss:3.3953 train_time:1008254ms step_avg:363.60ms step:2784/6000 train_loss:3.5077 train_time:1008508ms step_avg:363.56ms step:2785/6000 train_loss:3.5666 train_time:1008763ms step_avg:363.52ms step:2786/6000 train_loss:3.4343 train_time:1009018ms step_avg:363.48ms step:2787/6000 train_loss:3.5682 train_time:1009272ms step_avg:363.44ms step:2788/6000 train_loss:3.5280 train_time:1009529ms step_avg:363.40ms step:2789/6000 train_loss:3.4651 train_time:1009783ms step_avg:363.36ms step:2790/6000 train_loss:3.5395 train_time:1010038ms step_avg:363.32ms step:2791/6000 train_loss:3.4728 train_time:1011364ms step_avg:363.67ms step:2792/6000 train_loss:3.3680 train_time:1011618ms step_avg:363.63ms step:2793/6000 train_loss:3.4755 train_time:1011872ms step_avg:363.59ms step:2794/6000 train_loss:3.5119 train_time:1012127ms step_avg:363.55ms step:2795/6000 train_loss:3.4263 train_time:1012382ms step_avg:363.51ms step:2796/6000 train_loss:3.4633 train_time:1012637ms step_avg:363.47ms step:2797/6000 train_loss:3.3971 train_time:1012891ms step_avg:363.43ms step:2798/6000 train_loss:3.4940 train_time:1013146ms step_avg:363.40ms step:2799/6000 train_loss:3.4435 train_time:1013400ms step_avg:363.36ms step:2800/6000 train_loss:3.6214 train_time:1013655ms step_avg:363.32ms step:2801/6000 train_loss:3.5760 train_time:1014982ms step_avg:363.66ms step:2802/6000 train_loss:3.5342 train_time:1015233ms step_avg:363.62ms step:2803/6000 train_loss:3.4785 train_time:1015488ms step_avg:363.58ms step:2804/6000 train_loss:3.6705 train_time:1015743ms step_avg:363.54ms step:2805/6000 train_loss:3.6207 train_time:1015997ms step_avg:363.51ms step:2806/6000 train_loss:3.3597 train_time:1016251ms step_avg:363.47ms step:2807/6000 train_loss:3.7542 train_time:1016507ms step_avg:363.43ms step:2808/6000 train_loss:3.4972 train_time:1016762ms step_avg:363.39ms step:2809/6000 train_loss:3.4240 train_time:1017016ms step_avg:363.35ms step:2810/6000 train_loss:3.4599 train_time:1017270ms step_avg:363.31ms step:2811/6000 train_loss:3.6124 train_time:1018597ms step_avg:363.65ms step:2812/6000 train_loss:3.6028 train_time:1018849ms step_avg:363.61ms step:2813/6000 train_loss:3.3492 train_time:1019104ms step_avg:363.58ms step:2814/6000 train_loss:3.5711 train_time:1019358ms step_avg:363.54ms step:2815/6000 train_loss:3.6398 train_time:1019612ms step_avg:363.50ms step:2816/6000 train_loss:3.4535 train_time:1019868ms step_avg:363.46ms step:2817/6000 train_loss:3.3011 train_time:1020123ms step_avg:363.42ms step:2818/6000 train_loss:3.4858 train_time:1020377ms step_avg:363.38ms step:2819/6000 train_loss:3.4512 train_time:1020633ms step_avg:363.34ms step:2820/6000 train_loss:3.6430 train_time:1020887ms step_avg:363.31ms step:2821/6000 train_loss:3.6251 train_time:1022214ms step_avg:363.65ms step:2822/6000 train_loss:3.5781 train_time:1022468ms step_avg:363.61ms step:2823/6000 train_loss:3.5071 train_time:1022723ms step_avg:363.57ms step:2824/6000 train_loss:3.4584 train_time:1022976ms step_avg:363.53ms step:2825/6000 train_loss:3.3576 train_time:1023231ms step_avg:363.49ms step:2826/6000 train_loss:3.6251 train_time:1023485ms step_avg:363.45ms step:2827/6000 train_loss:3.5319 train_time:1023740ms step_avg:363.42ms step:2828/6000 train_loss:3.4023 train_time:1023995ms step_avg:363.38ms step:2829/6000 train_loss:3.5350 train_time:1024250ms step_avg:363.34ms step:2830/6000 train_loss:3.5341 train_time:1024505ms step_avg:363.30ms step:2831/6000 train_loss:3.4729 train_time:1025831ms step_avg:363.64ms step:2832/6000 train_loss:3.6184 train_time:1026084ms step_avg:363.60ms step:2833/6000 train_loss:3.5368 train_time:1026339ms step_avg:363.56ms step:2834/6000 train_loss:3.5171 train_time:1026594ms step_avg:363.52ms step:2835/6000 train_loss:3.3319 train_time:1026984ms step_avg:363.53ms step:2836/6000 train_loss:3.5520 train_time:1027241ms step_avg:363.50ms step:2837/6000 train_loss:3.4869 train_time:1027496ms step_avg:363.46ms step:2838/6000 train_loss:3.8013 train_time:1027749ms step_avg:363.42ms step:2839/6000 train_loss:3.4411 train_time:1028004ms step_avg:363.38ms step:2840/6000 train_loss:3.4429 train_time:1028258ms step_avg:363.34ms step:2841/6000 train_loss:3.5060 train_time:1029583ms step_avg:363.68ms step:2842/6000 train_loss:3.4308 train_time:1029835ms step_avg:363.64ms step:2843/6000 train_loss:3.4253 train_time:1030089ms step_avg:363.60ms step:2844/6000 train_loss:3.6094 train_time:1030344ms step_avg:363.57ms step:2845/6000 train_loss:3.4937 train_time:1030598ms step_avg:363.53ms step:2846/6000 train_loss:3.5197 train_time:1030852ms step_avg:363.49ms step:2847/6000 train_loss:3.4728 train_time:1031108ms step_avg:363.45ms step:2848/6000 train_loss:3.7477 train_time:1031363ms step_avg:363.41ms step:2849/6000 train_loss:3.4154 train_time:1031617ms step_avg:363.37ms step:2850/6000 train_loss:3.4397 train_time:1032033ms step_avg:363.39ms step:2851/6000 train_loss:3.5412 train_time:1033360ms step_avg:363.73ms step:2852/6000 train_loss:3.5102 train_time:1033612ms step_avg:363.69ms step:2853/6000 train_loss:3.4759 train_time:1033866ms step_avg:363.65ms step:2854/6000 train_loss:3.5564 train_time:1034120ms step_avg:363.61ms step:2855/6000 train_loss:3.3820 train_time:1034375ms step_avg:363.58ms step:2856/6000 train_loss:3.3938 train_time:1034630ms step_avg:363.54ms step:2857/6000 train_loss:3.4879 train_time:1034885ms step_avg:363.50ms step:2858/6000 train_loss:3.4913 train_time:1035140ms step_avg:363.46ms step:2859/6000 train_loss:3.3620 train_time:1035395ms step_avg:363.42ms step:2860/6000 train_loss:3.4879 train_time:1035650ms step_avg:363.39ms step:2861/6000 train_loss:3.4476 train_time:1036977ms step_avg:363.72ms step:2862/6000 train_loss:3.4854 train_time:1037229ms step_avg:363.68ms step:2863/6000 train_loss:3.5286 train_time:1037483ms step_avg:363.65ms step:2864/6000 train_loss:3.7906 train_time:1037737ms step_avg:363.61ms step:2865/6000 train_loss:3.6063 train_time:1037992ms step_avg:363.57ms step:2866/6000 train_loss:3.4891 train_time:1038248ms step_avg:363.53ms step:2867/6000 train_loss:3.3924 train_time:1038503ms step_avg:363.49ms step:2868/6000 train_loss:3.5798 train_time:1038757ms step_avg:363.46ms step:2869/6000 train_loss:3.5231 train_time:1039012ms step_avg:363.42ms step:2870/6000 train_loss:3.4945 train_time:1039267ms step_avg:363.38ms step:2871/6000 train_loss:3.6241 train_time:1040594ms step_avg:363.72ms step:2872/6000 train_loss:3.4094 train_time:1040847ms step_avg:363.68ms step:2873/6000 train_loss:3.4557 train_time:1041101ms step_avg:363.64ms step:2874/6000 train_loss:3.3264 train_time:1041355ms step_avg:363.60ms step:2875/6000 train_loss:3.4800 train_time:1041610ms step_avg:363.56ms step:2875/6000 val_loss:3.4853 train_time:1041644ms step_avg:363.58ms step:2876/6000 train_loss:3.4025 train_time:1041869ms step_avg:363.53ms step:2877/6000 train_loss:3.3852 train_time:1042124ms step_avg:363.49ms step:2878/6000 train_loss:3.4744 train_time:1042380ms step_avg:363.45ms step:2879/6000 train_loss:3.5906 train_time:1042636ms step_avg:363.41ms step:2880/6000 train_loss:3.5458 train_time:1042890ms step_avg:363.38ms step:2881/6000 train_loss:3.4786 train_time:1044217ms step_avg:363.71ms step:2882/6000 train_loss:3.4726 train_time:1044470ms step_avg:363.67ms step:2883/6000 train_loss:3.5984 train_time:1044725ms step_avg:363.64ms step:2884/6000 train_loss:3.3847 train_time:1044980ms step_avg:363.60ms step:2885/6000 train_loss:3.4080 train_time:1045235ms step_avg:363.56ms step:2886/6000 train_loss:3.4429 train_time:1045490ms step_avg:363.52ms step:2887/6000 train_loss:3.4470 train_time:1045745ms step_avg:363.48ms step:2888/6000 train_loss:3.4613 train_time:1045999ms step_avg:363.45ms step:2889/6000 train_loss:3.4740 train_time:1046255ms step_avg:363.41ms step:2890/6000 train_loss:3.6581 train_time:1046509ms step_avg:363.37ms step:2891/6000 train_loss:3.5131 train_time:1047836ms step_avg:363.71ms step:2892/6000 train_loss:3.3540 train_time:1048088ms step_avg:363.67ms step:2893/6000 train_loss:3.2830 train_time:1048342ms step_avg:363.63ms step:2894/6000 train_loss:3.4152 train_time:1048598ms step_avg:363.59ms step:2895/6000 train_loss:3.2936 train_time:1048854ms step_avg:363.55ms step:2896/6000 train_loss:3.4759 train_time:1049107ms step_avg:363.52ms step:2897/6000 train_loss:3.6027 train_time:1049363ms step_avg:363.48ms step:2898/6000 train_loss:3.4279 train_time:1049617ms step_avg:363.44ms step:2899/6000 train_loss:3.5274 train_time:1049874ms step_avg:363.40ms step:2900/6000 train_loss:3.4208 train_time:1050128ms step_avg:363.37ms step:2901/6000 train_loss:3.5972 train_time:1051455ms step_avg:363.70ms step:2902/6000 train_loss:3.5859 train_time:1051709ms step_avg:363.66ms step:2903/6000 train_loss:3.6335 train_time:1051963ms step_avg:363.62ms step:2904/6000 train_loss:3.3328 train_time:1052218ms step_avg:363.59ms step:2905/6000 train_loss:3.4759 train_time:1052472ms step_avg:363.55ms step:2906/6000 train_loss:3.4526 train_time:1052726ms step_avg:363.51ms step:2907/6000 train_loss:3.5508 train_time:1052982ms step_avg:363.47ms step:2908/6000 train_loss:3.4735 train_time:1053237ms step_avg:363.44ms step:2909/6000 train_loss:3.4352 train_time:1053492ms step_avg:363.40ms step:2910/6000 train_loss:3.7724 train_time:1053748ms step_avg:363.36ms step:2911/6000 train_loss:3.4853 train_time:1055074ms step_avg:363.69ms step:2912/6000 train_loss:3.3911 train_time:1055325ms step_avg:363.65ms step:2913/6000 train_loss:3.3813 train_time:1055581ms step_avg:363.62ms step:2914/6000 train_loss:3.8610 train_time:1055835ms step_avg:363.58ms step:2915/6000 train_loss:3.4574 train_time:1056090ms step_avg:363.54ms step:2916/6000 train_loss:3.4036 train_time:1056345ms step_avg:363.50ms step:2917/6000 train_loss:3.3819 train_time:1056599ms step_avg:363.47ms step:2918/6000 train_loss:3.6744 train_time:1056855ms step_avg:363.43ms step:2919/6000 train_loss:3.1791 train_time:1057109ms step_avg:363.39ms step:2920/6000 train_loss:3.3943 train_time:1057364ms step_avg:363.36ms step:2921/6000 train_loss:3.3972 train_time:1058690ms step_avg:363.69ms step:2922/6000 train_loss:3.4891 train_time:1058942ms step_avg:363.65ms step:2923/6000 train_loss:3.5424 train_time:1059195ms step_avg:363.61ms step:2924/6000 train_loss:3.5683 train_time:1059450ms step_avg:363.57ms step:2925/6000 train_loss:3.5742 train_time:1059704ms step_avg:363.53ms step:2926/6000 train_loss:3.4549 train_time:1059959ms step_avg:363.50ms step:2927/6000 train_loss:3.4744 train_time:1060213ms step_avg:363.46ms step:2928/6000 train_loss:3.4549 train_time:1060468ms step_avg:363.42ms step:2929/6000 train_loss:3.4563 train_time:1060723ms step_avg:363.39ms step:2930/6000 train_loss:3.4275 train_time:1060978ms step_avg:363.35ms step:2931/6000 train_loss:3.4572 train_time:1062303ms step_avg:363.68ms step:2932/6000 train_loss:3.5877 train_time:1062555ms step_avg:363.64ms step:2933/6000 train_loss:3.6199 train_time:1062809ms step_avg:363.60ms step:2934/6000 train_loss:3.5950 train_time:1063065ms step_avg:363.57ms step:2935/6000 train_loss:3.4346 train_time:1063319ms step_avg:363.53ms step:2936/6000 train_loss:3.4949 train_time:1063577ms step_avg:363.49ms step:2937/6000 train_loss:3.4190 train_time:1063831ms step_avg:363.45ms step:2938/6000 train_loss:3.4549 train_time:1064086ms step_avg:363.42ms step:2939/6000 train_loss:3.4836 train_time:1064340ms step_avg:363.38ms step:2940/6000 train_loss:3.5260 train_time:1064597ms step_avg:363.34ms step:2941/6000 train_loss:3.5652 train_time:1065924ms step_avg:363.67ms step:2942/6000 train_loss:3.5564 train_time:1066176ms step_avg:363.63ms step:2943/6000 train_loss:3.4840 train_time:1066430ms step_avg:363.60ms step:2944/6000 train_loss:3.3610 train_time:1066684ms step_avg:363.56ms step:2945/6000 train_loss:3.2981 train_time:1066938ms step_avg:363.52ms step:2946/6000 train_loss:3.5036 train_time:1067194ms step_avg:363.49ms step:2947/6000 train_loss:3.5702 train_time:1067449ms step_avg:363.45ms step:2948/6000 train_loss:3.5070 train_time:1067702ms step_avg:363.41ms step:2949/6000 train_loss:3.6800 train_time:1067956ms step_avg:363.37ms step:2950/6000 train_loss:3.5097 train_time:1068210ms step_avg:363.34ms step:2951/6000 train_loss:3.5102 train_time:1069538ms step_avg:363.66ms step:2952/6000 train_loss:3.9181 train_time:1069789ms step_avg:363.63ms step:2953/6000 train_loss:3.5837 train_time:1070045ms step_avg:363.59ms step:2954/6000 train_loss:3.5296 train_time:1070299ms step_avg:363.55ms step:2955/6000 train_loss:3.5327 train_time:1070554ms step_avg:363.52ms step:2956/6000 train_loss:3.4731 train_time:1070808ms step_avg:363.48ms step:2957/6000 train_loss:3.5019 train_time:1071062ms step_avg:363.44ms step:2958/6000 train_loss:3.3648 train_time:1071317ms step_avg:363.40ms step:2959/6000 train_loss:3.4511 train_time:1071571ms step_avg:363.37ms step:2960/6000 train_loss:3.5948 train_time:1071826ms step_avg:363.33ms step:2961/6000 train_loss:3.4077 train_time:1073153ms step_avg:363.66ms step:2962/6000 train_loss:3.5307 train_time:1073404ms step_avg:363.62ms step:2963/6000 train_loss:3.3905 train_time:1073660ms step_avg:363.58ms step:2964/6000 train_loss:3.4424 train_time:1073916ms step_avg:363.55ms step:2965/6000 train_loss:3.4346 train_time:1074171ms step_avg:363.51ms step:2966/6000 train_loss:3.5582 train_time:1074425ms step_avg:363.47ms step:2967/6000 train_loss:3.4202 train_time:1074679ms step_avg:363.44ms step:2968/6000 train_loss:3.6667 train_time:1074934ms step_avg:363.40ms step:2969/6000 train_loss:3.5168 train_time:1075189ms step_avg:363.36ms step:2970/6000 train_loss:3.5324 train_time:1075443ms step_avg:363.33ms step:2971/6000 train_loss:3.5206 train_time:1076772ms step_avg:363.65ms step:2972/6000 train_loss:3.5944 train_time:1077024ms step_avg:363.61ms step:2973/6000 train_loss:3.4189 train_time:1077278ms step_avg:363.58ms step:2974/6000 train_loss:3.4334 train_time:1077532ms step_avg:363.54ms step:2975/6000 train_loss:3.3387 train_time:1077787ms step_avg:363.50ms step:2976/6000 train_loss:3.4206 train_time:1078041ms step_avg:363.47ms step:2977/6000 train_loss:3.4166 train_time:1078297ms step_avg:363.43ms step:2978/6000 train_loss:3.4300 train_time:1078552ms step_avg:363.39ms step:2979/6000 train_loss:3.7119 train_time:1078806ms step_avg:363.36ms step:2980/6000 train_loss:3.5183 train_time:1079060ms step_avg:363.32ms step:2981/6000 train_loss:3.5706 train_time:1080388ms step_avg:363.64ms step:2982/6000 train_loss:3.5778 train_time:1080640ms step_avg:363.61ms step:2983/6000 train_loss:3.6542 train_time:1080895ms step_avg:363.57ms step:2984/6000 train_loss:3.4576 train_time:1081149ms step_avg:363.53ms step:2985/6000 train_loss:3.5496 train_time:1081404ms step_avg:363.50ms step:2986/6000 train_loss:3.5543 train_time:1081659ms step_avg:363.46ms step:2987/6000 train_loss:3.5038 train_time:1081913ms step_avg:363.42ms step:2988/6000 train_loss:3.6312 train_time:1082168ms step_avg:363.39ms step:2989/6000 train_loss:3.2295 train_time:1082422ms step_avg:363.35ms step:2990/6000 train_loss:3.5718 train_time:1082678ms step_avg:363.31ms step:2991/6000 train_loss:3.5221 train_time:1084002ms step_avg:363.64ms step:2992/6000 train_loss:3.5016 train_time:1084256ms step_avg:363.60ms step:2993/6000 train_loss:3.4146 train_time:1084510ms step_avg:363.56ms step:2994/6000 train_loss:3.5536 train_time:1084765ms step_avg:363.53ms step:2995/6000 train_loss:3.3743 train_time:1085019ms step_avg:363.49ms step:2996/6000 train_loss:3.4111 train_time:1085275ms step_avg:363.45ms step:2997/6000 train_loss:3.4706 train_time:1085529ms step_avg:363.42ms step:2998/6000 train_loss:3.4145 train_time:1085784ms step_avg:363.38ms step:2999/6000 train_loss:3.5382 train_time:1086039ms step_avg:363.35ms step:3000/6000 train_loss:3.4424 train_time:1086292ms step_avg:363.31ms step:3000/6000 val_loss:3.4814 train_time:1086327ms step_avg:363.32ms step:3001/6000 train_loss:3.4339 train_time:1087620ms step_avg:363.63ms step:3002/6000 train_loss:3.3719 train_time:1087874ms step_avg:363.59ms step:3003/6000 train_loss:3.4237 train_time:1088129ms step_avg:363.56ms step:3004/6000 train_loss:3.5477 train_time:1088384ms step_avg:363.52ms step:3005/6000 train_loss:3.8924 train_time:1088638ms step_avg:363.49ms step:3006/6000 train_loss:3.4572 train_time:1088894ms step_avg:363.45ms step:3007/6000 train_loss:3.5287 train_time:1089148ms step_avg:363.41ms step:3008/6000 train_loss:3.3419 train_time:1089403ms step_avg:363.38ms step:3009/6000 train_loss:3.5530 train_time:1089658ms step_avg:363.34ms step:3010/6000 train_loss:3.4496 train_time:1089914ms step_avg:363.30ms step:3011/6000 train_loss:3.5121 train_time:1091239ms step_avg:363.62ms step:3012/6000 train_loss:3.5127 train_time:1091491ms step_avg:363.59ms step:3013/6000 train_loss:3.4080 train_time:1091745ms step_avg:363.55ms step:3014/6000 train_loss:3.5970 train_time:1092000ms step_avg:363.52ms step:3015/6000 train_loss:3.5682 train_time:1092254ms step_avg:363.48ms step:3016/6000 train_loss:3.4282 train_time:1092509ms step_avg:363.44ms step:3017/6000 train_loss:3.4602 train_time:1092763ms step_avg:363.41ms step:3018/6000 train_loss:3.5088 train_time:1093019ms step_avg:363.37ms step:3019/6000 train_loss:3.5440 train_time:1093274ms step_avg:363.33ms step:3020/6000 train_loss:3.3361 train_time:1093528ms step_avg:363.30ms step:3021/6000 train_loss:3.6290 train_time:1094854ms step_avg:363.62ms step:3022/6000 train_loss:3.4632 train_time:1095107ms step_avg:363.58ms step:3023/6000 train_loss:3.3848 train_time:1095362ms step_avg:363.55ms step:3024/6000 train_loss:3.4741 train_time:1095754ms step_avg:363.55ms step:3025/6000 train_loss:3.4609 train_time:1096009ms step_avg:363.52ms step:3026/6000 train_loss:3.5061 train_time:1096264ms step_avg:363.48ms step:3027/6000 train_loss:3.5322 train_time:1096518ms step_avg:363.45ms step:3028/6000 train_loss:3.4467 train_time:1096773ms step_avg:363.41ms step:3029/6000 train_loss:3.2447 train_time:1097027ms step_avg:363.37ms step:3030/6000 train_loss:3.5886 train_time:1097283ms step_avg:363.34ms step:3031/6000 train_loss:3.3484 train_time:1098609ms step_avg:363.66ms step:3032/6000 train_loss:3.3445 train_time:1098860ms step_avg:363.62ms step:3033/6000 train_loss:3.6838 train_time:1099115ms step_avg:363.58ms step:3034/6000 train_loss:3.6685 train_time:1099370ms step_avg:363.55ms step:3035/6000 train_loss:3.4389 train_time:1099624ms step_avg:363.51ms step:3036/6000 train_loss:3.5198 train_time:1099878ms step_avg:363.48ms step:3037/6000 train_loss:3.4736 train_time:1100134ms step_avg:363.44ms step:3038/6000 train_loss:3.3664 train_time:1100388ms step_avg:363.40ms step:3039/6000 train_loss:3.4318 train_time:1100643ms step_avg:363.37ms step:3040/6000 train_loss:3.5293 train_time:1101049ms step_avg:363.38ms step:3041/6000 train_loss:3.5187 train_time:1102375ms step_avg:363.70ms step:3042/6000 train_loss:3.3219 train_time:1102626ms step_avg:363.66ms step:3043/6000 train_loss:3.4648 train_time:1102881ms step_avg:363.63ms step:3044/6000 train_loss:3.4948 train_time:1103136ms step_avg:363.59ms step:3045/6000 train_loss:3.5071 train_time:1103390ms step_avg:363.56ms step:3046/6000 train_loss:3.5875 train_time:1103644ms step_avg:363.52ms step:3047/6000 train_loss:3.3997 train_time:1103899ms step_avg:363.48ms step:3048/6000 train_loss:3.5211 train_time:1104153ms step_avg:363.45ms step:3049/6000 train_loss:3.4708 train_time:1104410ms step_avg:363.41ms step:3050/6000 train_loss:3.4014 train_time:1104666ms step_avg:363.38ms step:3051/6000 train_loss:3.5326 train_time:1105993ms step_avg:363.69ms step:3052/6000 train_loss:3.3709 train_time:1106244ms step_avg:363.66ms step:3053/6000 train_loss:3.6106 train_time:1106499ms step_avg:363.62ms step:3054/6000 train_loss:3.5580 train_time:1106753ms step_avg:363.59ms step:3055/6000 train_loss:3.5369 train_time:1107008ms step_avg:363.55ms step:3056/6000 train_loss:3.5327 train_time:1107263ms step_avg:363.51ms step:3057/6000 train_loss:3.4246 train_time:1107517ms step_avg:363.48ms step:3058/6000 train_loss:3.4491 train_time:1107773ms step_avg:363.44ms step:3059/6000 train_loss:3.5319 train_time:1108028ms step_avg:363.41ms step:3060/6000 train_loss:3.4280 train_time:1108284ms step_avg:363.37ms step:3061/6000 train_loss:3.4878 train_time:1109610ms step_avg:363.69ms step:3062/6000 train_loss:3.4881 train_time:1109861ms step_avg:363.65ms step:3063/6000 train_loss:3.4258 train_time:1110116ms step_avg:363.61ms step:3064/6000 train_loss:3.3974 train_time:1110370ms step_avg:363.58ms step:3065/6000 train_loss:3.4159 train_time:1110624ms step_avg:363.54ms step:3066/6000 train_loss:3.3988 train_time:1110878ms step_avg:363.51ms step:3067/6000 train_loss:3.3885 train_time:1111135ms step_avg:363.47ms step:3068/6000 train_loss:3.3428 train_time:1111390ms step_avg:363.44ms step:3069/6000 train_loss:3.3938 train_time:1111644ms step_avg:363.40ms step:3070/6000 train_loss:3.3809 train_time:1111899ms step_avg:363.37ms step:3071/6000 train_loss:3.5622 train_time:1113226ms step_avg:363.68ms step:3072/6000 train_loss:3.4989 train_time:1113478ms step_avg:363.64ms step:3073/6000 train_loss:3.5332 train_time:1113734ms step_avg:363.61ms step:3074/6000 train_loss:3.5258 train_time:1113988ms step_avg:363.57ms step:3075/6000 train_loss:3.4698 train_time:1114243ms step_avg:363.54ms step:3076/6000 train_loss:3.5270 train_time:1114498ms step_avg:363.50ms step:3077/6000 train_loss:3.5842 train_time:1114752ms step_avg:363.47ms step:3078/6000 train_loss:3.3838 train_time:1115007ms step_avg:363.43ms step:3079/6000 train_loss:3.9351 train_time:1115261ms step_avg:363.40ms step:3080/6000 train_loss:3.4760 train_time:1115516ms step_avg:363.36ms step:3081/6000 train_loss:3.4381 train_time:1116843ms step_avg:363.67ms step:3082/6000 train_loss:3.5830 train_time:1117095ms step_avg:363.64ms step:3083/6000 train_loss:3.3980 train_time:1117348ms step_avg:363.60ms step:3084/6000 train_loss:3.4234 train_time:1117603ms step_avg:363.57ms step:3085/6000 train_loss:3.4649 train_time:1117857ms step_avg:363.53ms step:3086/6000 train_loss:3.5639 train_time:1118112ms step_avg:363.50ms step:3087/6000 train_loss:3.4778 train_time:1118366ms step_avg:363.46ms step:3088/6000 train_loss:3.3874 train_time:1118622ms step_avg:363.42ms step:3089/6000 train_loss:3.5395 train_time:1118876ms step_avg:363.39ms step:3090/6000 train_loss:3.4092 train_time:1119131ms step_avg:363.35ms step:3091/6000 train_loss:3.6646 train_time:1120458ms step_avg:363.67ms step:3092/6000 train_loss:4.2267 train_time:1120711ms step_avg:363.63ms step:3093/6000 train_loss:3.4987 train_time:1120965ms step_avg:363.60ms step:3094/6000 train_loss:3.3882 train_time:1121221ms step_avg:363.56ms step:3095/6000 train_loss:3.3490 train_time:1121475ms step_avg:363.53ms step:3096/6000 train_loss:3.5236 train_time:1121731ms step_avg:363.49ms step:3097/6000 train_loss:3.6359 train_time:1121984ms step_avg:363.45ms step:3098/6000 train_loss:3.4206 train_time:1122239ms step_avg:363.42ms step:3099/6000 train_loss:3.4577 train_time:1122494ms step_avg:363.38ms step:3100/6000 train_loss:3.6331 train_time:1122750ms step_avg:363.35ms step:3101/6000 train_loss:3.5368 train_time:1124076ms step_avg:363.66ms step:3102/6000 train_loss:3.5288 train_time:1124328ms step_avg:363.62ms step:3103/6000 train_loss:3.4322 train_time:1124583ms step_avg:363.59ms step:3104/6000 train_loss:3.6950 train_time:1124837ms step_avg:363.55ms step:3105/6000 train_loss:3.5055 train_time:1125092ms step_avg:363.52ms step:3106/6000 train_loss:3.3642 train_time:1125347ms step_avg:363.48ms step:3107/6000 train_loss:3.4011 train_time:1125602ms step_avg:363.45ms step:3108/6000 train_loss:3.3473 train_time:1125857ms step_avg:363.41ms step:3109/6000 train_loss:3.5742 train_time:1126112ms step_avg:363.38ms step:3110/6000 train_loss:3.4590 train_time:1126366ms step_avg:363.34ms step:3111/6000 train_loss:3.4998 train_time:1127694ms step_avg:363.66ms step:3112/6000 train_loss:3.4831 train_time:1127944ms step_avg:363.62ms step:3113/6000 train_loss:3.5336 train_time:1128199ms step_avg:363.58ms step:3114/6000 train_loss:3.4784 train_time:1128453ms step_avg:363.55ms step:3115/6000 train_loss:3.4940 train_time:1128708ms step_avg:363.51ms step:3116/6000 train_loss:3.5193 train_time:1128962ms step_avg:363.48ms step:3117/6000 train_loss:3.3805 train_time:1129217ms step_avg:363.44ms step:3118/6000 train_loss:3.3934 train_time:1129471ms step_avg:363.41ms step:3119/6000 train_loss:3.5859 train_time:1129726ms step_avg:363.37ms step:3120/6000 train_loss:3.5636 train_time:1129981ms step_avg:363.34ms step:3121/6000 train_loss:3.3472 train_time:1131307ms step_avg:363.65ms step:3122/6000 train_loss:3.5436 train_time:1131559ms step_avg:363.61ms step:3123/6000 train_loss:3.5969 train_time:1131815ms step_avg:363.58ms step:3124/6000 train_loss:3.5679 train_time:1132070ms step_avg:363.54ms step:3125/6000 train_loss:3.3541 train_time:1132324ms step_avg:363.51ms step:3125/6000 val_loss:3.4695 train_time:1132359ms step_avg:363.52ms step:3126/6000 train_loss:3.4405 train_time:1132582ms step_avg:363.47ms step:3127/6000 train_loss:3.4855 train_time:1132838ms step_avg:363.44ms step:3128/6000 train_loss:3.5649 train_time:1133092ms step_avg:363.40ms step:3129/6000 train_loss:3.6361 train_time:1133347ms step_avg:363.37ms step:3130/6000 train_loss:3.3431 train_time:1133602ms step_avg:363.33ms step:3131/6000 train_loss:3.5059 train_time:1134932ms step_avg:363.64ms step:3132/6000 train_loss:3.5079 train_time:1135184ms step_avg:363.61ms step:3133/6000 train_loss:3.5236 train_time:1135439ms step_avg:363.57ms step:3134/6000 train_loss:3.4264 train_time:1135694ms step_avg:363.54ms step:3135/6000 train_loss:3.5451 train_time:1135949ms step_avg:363.50ms step:3136/6000 train_loss:3.4582 train_time:1136203ms step_avg:363.47ms step:3137/6000 train_loss:3.5240 train_time:1136459ms step_avg:363.43ms step:3138/6000 train_loss:3.7197 train_time:1136714ms step_avg:363.40ms step:3139/6000 train_loss:3.6770 train_time:1136969ms step_avg:363.36ms step:3140/6000 train_loss:3.4433 train_time:1137224ms step_avg:363.33ms step:3141/6000 train_loss:3.4715 train_time:1138552ms step_avg:363.64ms step:3142/6000 train_loss:3.3877 train_time:1138803ms step_avg:363.60ms step:3143/6000 train_loss:3.4819 train_time:1139059ms step_avg:363.57ms step:3144/6000 train_loss:3.2816 train_time:1139313ms step_avg:363.53ms step:3145/6000 train_loss:3.5177 train_time:1139567ms step_avg:363.50ms step:3146/6000 train_loss:3.4361 train_time:1139821ms step_avg:363.46ms step:3147/6000 train_loss:3.4503 train_time:1140076ms step_avg:363.43ms step:3148/6000 train_loss:3.6273 train_time:1140330ms step_avg:363.39ms step:3149/6000 train_loss:3.7169 train_time:1140585ms step_avg:363.36ms step:3150/6000 train_loss:3.5822 train_time:1140841ms step_avg:363.33ms step:3151/6000 train_loss:3.3906 train_time:1142166ms step_avg:363.63ms step:3152/6000 train_loss:3.4399 train_time:1142418ms step_avg:363.60ms step:3153/6000 train_loss:3.4204 train_time:1142674ms step_avg:363.56ms step:3154/6000 train_loss:3.5379 train_time:1142928ms step_avg:363.53ms step:3155/6000 train_loss:3.3528 train_time:1143183ms step_avg:363.49ms step:3156/6000 train_loss:3.5013 train_time:1143438ms step_avg:363.46ms step:3157/6000 train_loss:3.4417 train_time:1143692ms step_avg:363.42ms step:3158/6000 train_loss:3.5592 train_time:1143948ms step_avg:363.39ms step:3159/6000 train_loss:3.6214 train_time:1144203ms step_avg:363.35ms step:3160/6000 train_loss:3.4680 train_time:1144458ms step_avg:363.32ms step:3161/6000 train_loss:3.5340 train_time:1145784ms step_avg:363.63ms step:3162/6000 train_loss:3.5963 train_time:1146036ms step_avg:363.59ms step:3163/6000 train_loss:3.5055 train_time:1146290ms step_avg:363.56ms step:3164/6000 train_loss:3.5614 train_time:1146547ms step_avg:363.52ms step:3165/6000 train_loss:3.3817 train_time:1146801ms step_avg:363.49ms step:3166/6000 train_loss:3.3666 train_time:1147055ms step_avg:363.45ms step:3167/6000 train_loss:3.4030 train_time:1147310ms step_avg:363.42ms step:3168/6000 train_loss:3.2387 train_time:1147565ms step_avg:363.38ms step:3169/6000 train_loss:3.4001 train_time:1147820ms step_avg:363.35ms step:3170/6000 train_loss:3.5500 train_time:1148076ms step_avg:363.32ms step:3171/6000 train_loss:3.5873 train_time:1149403ms step_avg:363.62ms step:3172/6000 train_loss:3.5343 train_time:1149655ms step_avg:363.58ms step:3173/6000 train_loss:3.5056 train_time:1149909ms step_avg:363.55ms step:3174/6000 train_loss:3.4735 train_time:1150164ms step_avg:363.52ms step:3175/6000 train_loss:3.4753 train_time:1150418ms step_avg:363.48ms step:3176/6000 train_loss:3.4828 train_time:1150674ms step_avg:363.45ms step:3177/6000 train_loss:3.4068 train_time:1150928ms step_avg:363.41ms step:3178/6000 train_loss:3.5336 train_time:1151184ms step_avg:363.38ms step:3179/6000 train_loss:3.6054 train_time:1151440ms step_avg:363.34ms step:3180/6000 train_loss:3.4555 train_time:1151694ms step_avg:363.31ms step:3181/6000 train_loss:3.4378 train_time:1153020ms step_avg:363.61ms step:3182/6000 train_loss:3.4908 train_time:1153274ms step_avg:363.58ms step:3183/6000 train_loss:3.5848 train_time:1153529ms step_avg:363.55ms step:3184/6000 train_loss:3.6029 train_time:1153783ms step_avg:363.51ms step:3185/6000 train_loss:3.4990 train_time:1154038ms step_avg:363.48ms step:3186/6000 train_loss:3.5678 train_time:1154292ms step_avg:363.44ms step:3187/6000 train_loss:3.5584 train_time:1154547ms step_avg:363.41ms step:3188/6000 train_loss:3.3499 train_time:1154801ms step_avg:363.37ms step:3189/6000 train_loss:3.5402 train_time:1155057ms step_avg:363.34ms step:3190/6000 train_loss:3.4633 train_time:1155311ms step_avg:363.31ms step:3191/6000 train_loss:3.4927 train_time:1156639ms step_avg:363.61ms step:3192/6000 train_loss:3.4481 train_time:1156892ms step_avg:363.57ms step:3193/6000 train_loss:3.3802 train_time:1157147ms step_avg:363.54ms step:3194/6000 train_loss:4.4012 train_time:1157401ms step_avg:363.51ms step:3195/6000 train_loss:3.4941 train_time:1157656ms step_avg:363.47ms step:3196/6000 train_loss:3.2824 train_time:1157910ms step_avg:363.44ms step:3197/6000 train_loss:3.4465 train_time:1158166ms step_avg:363.40ms step:3198/6000 train_loss:3.3318 train_time:1158421ms step_avg:363.37ms step:3199/6000 train_loss:3.4309 train_time:1158676ms step_avg:363.34ms step:3200/6000 train_loss:3.3659 train_time:1158930ms step_avg:363.30ms step:3201/6000 train_loss:3.4511 train_time:1160257ms step_avg:363.60ms step:3202/6000 train_loss:3.5394 train_time:1160508ms step_avg:363.57ms step:3203/6000 train_loss:3.3947 train_time:1160763ms step_avg:363.53ms step:3204/6000 train_loss:3.4312 train_time:1161018ms step_avg:363.50ms step:3205/6000 train_loss:3.5204 train_time:1161272ms step_avg:363.47ms step:3206/6000 train_loss:3.6803 train_time:1161526ms step_avg:363.43ms step:3207/6000 train_loss:3.2765 train_time:1161783ms step_avg:363.40ms step:3208/6000 train_loss:3.6315 train_time:1162037ms step_avg:363.36ms step:3209/6000 train_loss:3.4827 train_time:1162291ms step_avg:363.33ms step:3210/6000 train_loss:3.5521 train_time:1162546ms step_avg:363.30ms step:3211/6000 train_loss:3.6423 train_time:1163873ms step_avg:363.60ms step:3212/6000 train_loss:3.3265 train_time:1164125ms step_avg:363.56ms step:3213/6000 train_loss:3.3691 train_time:1164520ms step_avg:363.57ms step:3214/6000 train_loss:3.5875 train_time:1164775ms step_avg:363.54ms step:3215/6000 train_loss:3.3809 train_time:1165029ms step_avg:363.50ms step:3216/6000 train_loss:3.4482 train_time:1165282ms step_avg:363.47ms step:3217/6000 train_loss:3.3422 train_time:1165536ms step_avg:363.43ms step:3218/6000 train_loss:3.4747 train_time:1165790ms step_avg:363.40ms step:3219/6000 train_loss:3.5194 train_time:1166045ms step_avg:363.37ms step:3220/6000 train_loss:3.5697 train_time:1166299ms step_avg:363.33ms step:3221/6000 train_loss:3.5152 train_time:1167626ms step_avg:363.63ms step:3222/6000 train_loss:3.5069 train_time:1167879ms step_avg:363.60ms step:3223/6000 train_loss:3.3816 train_time:1168133ms step_avg:363.56ms step:3224/6000 train_loss:3.3985 train_time:1168388ms step_avg:363.53ms step:3225/6000 train_loss:3.4080 train_time:1168642ms step_avg:363.50ms step:3226/6000 train_loss:3.4402 train_time:1168896ms step_avg:363.46ms step:3227/6000 train_loss:3.3742 train_time:1169151ms step_avg:363.43ms step:3228/6000 train_loss:3.2937 train_time:1169405ms step_avg:363.39ms step:3229/6000 train_loss:3.4204 train_time:1169660ms step_avg:363.36ms step:3230/6000 train_loss:3.1924 train_time:1170105ms step_avg:363.39ms step:3231/6000 train_loss:3.3698 train_time:1171432ms step_avg:363.69ms step:3232/6000 train_loss:3.3693 train_time:1171682ms step_avg:363.65ms step:3233/6000 train_loss:3.6057 train_time:1171936ms step_avg:363.62ms step:3234/6000 train_loss:3.5816 train_time:1172190ms step_avg:363.58ms step:3235/6000 train_loss:3.5476 train_time:1172444ms step_avg:363.55ms step:3236/6000 train_loss:3.4452 train_time:1172699ms step_avg:363.51ms step:3237/6000 train_loss:3.6048 train_time:1172954ms step_avg:363.48ms step:3238/6000 train_loss:3.4634 train_time:1173209ms step_avg:363.45ms step:3239/6000 train_loss:3.5874 train_time:1173463ms step_avg:363.41ms step:3240/6000 train_loss:3.5579 train_time:1173718ms step_avg:363.38ms step:3241/6000 train_loss:3.4527 train_time:1175044ms step_avg:363.68ms step:3242/6000 train_loss:3.4092 train_time:1175295ms step_avg:363.64ms step:3243/6000 train_loss:3.6330 train_time:1175550ms step_avg:363.61ms step:3244/6000 train_loss:3.4994 train_time:1175804ms step_avg:363.58ms step:3245/6000 train_loss:3.5478 train_time:1176061ms step_avg:363.54ms step:3246/6000 train_loss:3.4364 train_time:1176314ms step_avg:363.51ms step:3247/6000 train_loss:3.5720 train_time:1176570ms step_avg:363.48ms step:3248/6000 train_loss:3.5015 train_time:1176824ms step_avg:363.44ms step:3249/6000 train_loss:3.4419 train_time:1177080ms step_avg:363.41ms step:3250/6000 train_loss:3.3212 train_time:1177335ms step_avg:363.37ms step:3250/6000 val_loss:3.4608 train_time:1177369ms step_avg:363.39ms step:3251/6000 train_loss:3.5197 train_time:1178665ms step_avg:363.67ms step:3252/6000 train_loss:3.5190 train_time:1178916ms step_avg:363.64ms step:3253/6000 train_loss:3.4736 train_time:1179170ms step_avg:363.60ms step:3254/6000 train_loss:3.3916 train_time:1179426ms step_avg:363.57ms step:3255/6000 train_loss:3.5481 train_time:1179680ms step_avg:363.54ms step:3256/6000 train_loss:3.5708 train_time:1179934ms step_avg:363.50ms step:3257/6000 train_loss:3.5086 train_time:1180190ms step_avg:363.47ms step:3258/6000 train_loss:3.5430 train_time:1180445ms step_avg:363.44ms step:3259/6000 train_loss:3.3885 train_time:1180700ms step_avg:363.40ms step:3260/6000 train_loss:3.4703 train_time:1180954ms step_avg:363.37ms step:3261/6000 train_loss:3.3389 train_time:1182283ms step_avg:363.67ms step:3262/6000 train_loss:3.3727 train_time:1182534ms step_avg:363.63ms step:3263/6000 train_loss:3.4060 train_time:1182789ms step_avg:363.60ms step:3264/6000 train_loss:3.5573 train_time:1183043ms step_avg:363.57ms step:3265/6000 train_loss:3.4372 train_time:1183299ms step_avg:363.53ms step:3266/6000 train_loss:3.4965 train_time:1183553ms step_avg:363.50ms step:3267/6000 train_loss:3.5102 train_time:1183808ms step_avg:363.47ms step:3268/6000 train_loss:3.5991 train_time:1184062ms step_avg:363.43ms step:3269/6000 train_loss:3.4089 train_time:1184317ms step_avg:363.40ms step:3270/6000 train_loss:3.5370 train_time:1184572ms step_avg:363.37ms step:3271/6000 train_loss:3.4074 train_time:1185898ms step_avg:363.66ms step:3272/6000 train_loss:3.3165 train_time:1186149ms step_avg:363.63ms step:3273/6000 train_loss:3.4158 train_time:1186406ms step_avg:363.59ms step:3274/6000 train_loss:3.5576 train_time:1186660ms step_avg:363.56ms step:3275/6000 train_loss:3.3489 train_time:1186914ms step_avg:363.53ms step:3276/6000 train_loss:3.5004 train_time:1187169ms step_avg:363.49ms step:3277/6000 train_loss:3.4950 train_time:1187424ms step_avg:363.46ms step:3278/6000 train_loss:3.4873 train_time:1187679ms step_avg:363.43ms step:3279/6000 train_loss:3.4519 train_time:1187933ms step_avg:363.39ms step:3280/6000 train_loss:3.6070 train_time:1188189ms step_avg:363.36ms step:3281/6000 train_loss:3.4585 train_time:1189516ms step_avg:363.66ms step:3282/6000 train_loss:3.5173 train_time:1189768ms step_avg:363.62ms step:3283/6000 train_loss:3.3630 train_time:1190023ms step_avg:363.59ms step:3284/6000 train_loss:3.5021 train_time:1190280ms step_avg:363.56ms step:3285/6000 train_loss:3.5499 train_time:1190534ms step_avg:363.52ms step:3286/6000 train_loss:3.5253 train_time:1190789ms step_avg:363.49ms step:3287/6000 train_loss:3.5684 train_time:1191043ms step_avg:363.46ms step:3288/6000 train_loss:3.4415 train_time:1191297ms step_avg:363.42ms step:3289/6000 train_loss:3.5601 train_time:1191552ms step_avg:363.39ms step:3290/6000 train_loss:3.4840 train_time:1191808ms step_avg:363.36ms step:3291/6000 train_loss:3.3629 train_time:1193134ms step_avg:363.65ms step:3292/6000 train_loss:3.4895 train_time:1193388ms step_avg:363.62ms step:3293/6000 train_loss:3.5189 train_time:1193642ms step_avg:363.58ms step:3294/6000 train_loss:3.5039 train_time:1193897ms step_avg:363.55ms step:3295/6000 train_loss:3.3894 train_time:1194151ms step_avg:363.52ms step:3296/6000 train_loss:3.4428 train_time:1194407ms step_avg:363.48ms step:3297/6000 train_loss:3.4898 train_time:1194661ms step_avg:363.45ms step:3298/6000 train_loss:3.4824 train_time:1194915ms step_avg:363.42ms step:3299/6000 train_loss:3.4682 train_time:1195170ms step_avg:363.38ms step:3300/6000 train_loss:3.5244 train_time:1195426ms step_avg:363.35ms step:3301/6000 train_loss:3.4278 train_time:1196752ms step_avg:363.64ms step:3302/6000 train_loss:3.4968 train_time:1197005ms step_avg:363.61ms step:3303/6000 train_loss:3.4390 train_time:1197259ms step_avg:363.58ms step:3304/6000 train_loss:3.4575 train_time:1197514ms step_avg:363.54ms step:3305/6000 train_loss:3.4509 train_time:1197769ms step_avg:363.51ms step:3306/6000 train_loss:3.5530 train_time:1198024ms step_avg:363.48ms step:3307/6000 train_loss:3.4806 train_time:1198279ms step_avg:363.45ms step:3308/6000 train_loss:3.4449 train_time:1198534ms step_avg:363.41ms step:3309/6000 train_loss:3.5633 train_time:1198791ms step_avg:363.38ms step:3310/6000 train_loss:3.4429 train_time:1199047ms step_avg:363.35ms step:3311/6000 train_loss:3.3859 train_time:1200373ms step_avg:363.64ms step:3312/6000 train_loss:3.4973 train_time:1200624ms step_avg:363.61ms step:3313/6000 train_loss:3.4615 train_time:1200878ms step_avg:363.57ms step:3314/6000 train_loss:3.6636 train_time:1201132ms step_avg:363.54ms step:3315/6000 train_loss:3.4945 train_time:1201388ms step_avg:363.51ms step:3316/6000 train_loss:3.4594 train_time:1201645ms step_avg:363.47ms step:3317/6000 train_loss:3.0872 train_time:1201899ms step_avg:363.44ms step:3318/6000 train_loss:3.5920 train_time:1202154ms step_avg:363.41ms step:3319/6000 train_loss:3.4414 train_time:1202410ms step_avg:363.38ms step:3320/6000 train_loss:3.5106 train_time:1202663ms step_avg:363.34ms step:3321/6000 train_loss:3.4462 train_time:1203990ms step_avg:363.63ms step:3322/6000 train_loss:3.5133 train_time:1204243ms step_avg:363.60ms step:3323/6000 train_loss:3.4440 train_time:1204498ms step_avg:363.57ms step:3324/6000 train_loss:3.3734 train_time:1204753ms step_avg:363.53ms step:3325/6000 train_loss:3.3067 train_time:1205008ms step_avg:363.50ms step:3326/6000 train_loss:3.4735 train_time:1205261ms step_avg:363.47ms step:3327/6000 train_loss:3.4241 train_time:1205518ms step_avg:363.44ms step:3328/6000 train_loss:3.3593 train_time:1205772ms step_avg:363.40ms step:3329/6000 train_loss:3.3934 train_time:1206028ms step_avg:363.37ms step:3330/6000 train_loss:3.3559 train_time:1206283ms step_avg:363.34ms step:3331/6000 train_loss:3.5965 train_time:1207608ms step_avg:363.63ms step:3332/6000 train_loss:3.4943 train_time:1207861ms step_avg:363.59ms step:3333/6000 train_loss:3.4732 train_time:1208116ms step_avg:363.56ms step:3334/6000 train_loss:3.3356 train_time:1208370ms step_avg:363.53ms step:3335/6000 train_loss:3.4109 train_time:1208625ms step_avg:363.50ms step:3336/6000 train_loss:3.5131 train_time:1208879ms step_avg:363.46ms step:3337/6000 train_loss:3.4898 train_time:1209134ms step_avg:363.43ms step:3338/6000 train_loss:3.5168 train_time:1209388ms step_avg:363.40ms step:3339/6000 train_loss:3.4452 train_time:1209643ms step_avg:363.37ms step:3340/6000 train_loss:3.4780 train_time:1209897ms step_avg:363.33ms step:3341/6000 train_loss:3.4902 train_time:1211224ms step_avg:363.62ms step:3342/6000 train_loss:3.4942 train_time:1211476ms step_avg:363.59ms step:3343/6000 train_loss:3.4878 train_time:1211731ms step_avg:363.56ms step:3344/6000 train_loss:3.4126 train_time:1211985ms step_avg:363.52ms step:3345/6000 train_loss:3.3290 train_time:1212239ms step_avg:363.49ms step:3346/6000 train_loss:3.6546 train_time:1212494ms step_avg:363.46ms step:3347/6000 train_loss:3.4080 train_time:1212749ms step_avg:363.42ms step:3348/6000 train_loss:3.5757 train_time:1213003ms step_avg:363.39ms step:3349/6000 train_loss:3.4420 train_time:1213257ms step_avg:363.36ms step:3350/6000 train_loss:3.5226 train_time:1213512ms step_avg:363.33ms step:3351/6000 train_loss:3.2772 train_time:1214840ms step_avg:363.62ms step:3352/6000 train_loss:3.3055 train_time:1215092ms step_avg:363.58ms step:3353/6000 train_loss:3.4759 train_time:1215347ms step_avg:363.55ms step:3354/6000 train_loss:3.3352 train_time:1215601ms step_avg:363.52ms step:3355/6000 train_loss:3.4943 train_time:1215855ms step_avg:363.48ms step:3356/6000 train_loss:3.3552 train_time:1216109ms step_avg:363.45ms step:3357/6000 train_loss:3.5297 train_time:1216363ms step_avg:363.42ms step:3358/6000 train_loss:3.3849 train_time:1216617ms step_avg:363.39ms step:3359/6000 train_loss:3.5569 train_time:1216872ms step_avg:363.35ms step:3360/6000 train_loss:3.3755 train_time:1217129ms step_avg:363.32ms step:3361/6000 train_loss:4.1682 train_time:1218456ms step_avg:363.61ms step:3362/6000 train_loss:3.5258 train_time:1218709ms step_avg:363.58ms step:3363/6000 train_loss:3.5727 train_time:1218963ms step_avg:363.54ms step:3364/6000 train_loss:3.4320 train_time:1219218ms step_avg:363.51ms step:3365/6000 train_loss:3.5628 train_time:1219471ms step_avg:363.48ms step:3366/6000 train_loss:3.4601 train_time:1219725ms step_avg:363.45ms step:3367/6000 train_loss:3.6315 train_time:1219980ms step_avg:363.41ms step:3368/6000 train_loss:3.4392 train_time:1220234ms step_avg:363.38ms step:3369/6000 train_loss:3.4525 train_time:1220491ms step_avg:363.35ms step:3370/6000 train_loss:3.4277 train_time:1220746ms step_avg:363.32ms step:3371/6000 train_loss:3.3828 train_time:1222071ms step_avg:363.60ms step:3372/6000 train_loss:3.3907 train_time:1222326ms step_avg:363.57ms step:3373/6000 train_loss:3.4500 train_time:1222579ms step_avg:363.54ms step:3374/6000 train_loss:3.4766 train_time:1222833ms step_avg:363.51ms step:3375/6000 train_loss:3.4473 train_time:1223089ms step_avg:363.47ms step:3375/6000 val_loss:3.4558 train_time:1223123ms step_avg:363.48ms step:3376/6000 train_loss:3.4849 train_time:1223349ms step_avg:363.44ms step:3377/6000 train_loss:3.4908 train_time:1223604ms step_avg:363.41ms step:3378/6000 train_loss:3.5677 train_time:1223863ms step_avg:363.38ms step:3379/6000 train_loss:3.4217 train_time:1224118ms step_avg:363.35ms step:3380/6000 train_loss:3.4381 train_time:1224374ms step_avg:363.32ms step:3381/6000 train_loss:3.4418 train_time:1225700ms step_avg:363.60ms step:3382/6000 train_loss:3.5503 train_time:1225953ms step_avg:363.57ms step:3383/6000 train_loss:3.3828 train_time:1226208ms step_avg:363.54ms step:3384/6000 train_loss:3.5548 train_time:1226463ms step_avg:363.50ms step:3385/6000 train_loss:3.4063 train_time:1226718ms step_avg:363.47ms step:3386/6000 train_loss:3.4288 train_time:1226972ms step_avg:363.44ms step:3387/6000 train_loss:3.3758 train_time:1227227ms step_avg:363.41ms step:3388/6000 train_loss:3.5496 train_time:1227483ms step_avg:363.38ms step:3389/6000 train_loss:3.5207 train_time:1227737ms step_avg:363.34ms step:3390/6000 train_loss:3.5297 train_time:1227991ms step_avg:363.31ms step:3391/6000 train_loss:3.5110 train_time:1229318ms step_avg:363.60ms step:3392/6000 train_loss:3.4469 train_time:1229570ms step_avg:363.56ms step:3393/6000 train_loss:3.5761 train_time:1229826ms step_avg:363.53ms step:3394/6000 train_loss:3.5320 train_time:1230081ms step_avg:363.50ms step:3395/6000 train_loss:3.6208 train_time:1230336ms step_avg:363.47ms step:3396/6000 train_loss:3.4828 train_time:1230589ms step_avg:363.43ms step:3397/6000 train_loss:3.4703 train_time:1230845ms step_avg:363.40ms step:3398/6000 train_loss:3.4167 train_time:1231099ms step_avg:363.37ms step:3399/6000 train_loss:3.4715 train_time:1231353ms step_avg:363.34ms step:3400/6000 train_loss:3.4672 train_time:1231608ms step_avg:363.31ms step:3401/6000 train_loss:3.5676 train_time:1232935ms step_avg:363.59ms step:3402/6000 train_loss:3.4329 train_time:1233334ms step_avg:363.60ms step:3403/6000 train_loss:3.6139 train_time:1233587ms step_avg:363.57ms step:3404/6000 train_loss:3.4395 train_time:1233841ms step_avg:363.54ms step:3405/6000 train_loss:3.4482 train_time:1234096ms step_avg:363.50ms step:3406/6000 train_loss:3.4013 train_time:1234350ms step_avg:363.47ms step:3407/6000 train_loss:3.4601 train_time:1234605ms step_avg:363.44ms step:3408/6000 train_loss:3.4621 train_time:1234861ms step_avg:363.41ms step:3409/6000 train_loss:3.4499 train_time:1235116ms step_avg:363.38ms step:3410/6000 train_loss:3.4648 train_time:1235371ms step_avg:363.34ms step:3411/6000 train_loss:3.4279 train_time:1236698ms step_avg:363.63ms step:3412/6000 train_loss:3.4626 train_time:1236950ms step_avg:363.59ms step:3413/6000 train_loss:3.3884 train_time:1237204ms step_avg:363.56ms step:3414/6000 train_loss:3.6010 train_time:1237460ms step_avg:363.53ms step:3415/6000 train_loss:3.3603 train_time:1237715ms step_avg:363.50ms step:3416/6000 train_loss:3.5120 train_time:1237969ms step_avg:363.47ms step:3417/6000 train_loss:3.3917 train_time:1238224ms step_avg:363.44ms step:3418/6000 train_loss:3.4950 train_time:1238479ms step_avg:363.40ms step:3419/6000 train_loss:3.5004 train_time:1238734ms step_avg:363.37ms step:3420/6000 train_loss:3.5206 train_time:1239145ms step_avg:363.39ms step:3421/6000 train_loss:3.3970 train_time:1240470ms step_avg:363.67ms step:3422/6000 train_loss:3.4446 train_time:1240723ms step_avg:363.64ms step:3423/6000 train_loss:3.3750 train_time:1240978ms step_avg:363.60ms step:3424/6000 train_loss:3.6995 train_time:1241233ms step_avg:363.57ms step:3425/6000 train_loss:3.5850 train_time:1241488ms step_avg:363.54ms step:3426/6000 train_loss:3.4510 train_time:1241744ms step_avg:363.51ms step:3427/6000 train_loss:3.4066 train_time:1241998ms step_avg:363.48ms step:3428/6000 train_loss:3.3839 train_time:1242252ms step_avg:363.44ms step:3429/6000 train_loss:3.3937 train_time:1242508ms step_avg:363.41ms step:3430/6000 train_loss:3.4480 train_time:1242764ms step_avg:363.38ms step:3431/6000 train_loss:3.4706 train_time:1244091ms step_avg:363.66ms step:3432/6000 train_loss:3.5762 train_time:1244342ms step_avg:363.63ms step:3433/6000 train_loss:3.3849 train_time:1244597ms step_avg:363.60ms step:3434/6000 train_loss:3.6116 train_time:1244851ms step_avg:363.57ms step:3435/6000 train_loss:3.5275 train_time:1245105ms step_avg:363.53ms step:3436/6000 train_loss:3.3684 train_time:1245361ms step_avg:363.50ms step:3437/6000 train_loss:3.4086 train_time:1245615ms step_avg:363.47ms step:3438/6000 train_loss:3.4691 train_time:1245870ms step_avg:363.44ms step:3439/6000 train_loss:3.5576 train_time:1246126ms step_avg:363.41ms step:3440/6000 train_loss:3.3299 train_time:1246380ms step_avg:363.38ms step:3441/6000 train_loss:3.5116 train_time:1247707ms step_avg:363.66ms step:3442/6000 train_loss:3.4129 train_time:1247963ms step_avg:363.63ms step:3443/6000 train_loss:3.5936 train_time:1248218ms step_avg:363.59ms step:3444/6000 train_loss:3.4562 train_time:1248473ms step_avg:363.56ms step:3445/6000 train_loss:3.3419 train_time:1248728ms step_avg:363.53ms step:3446/6000 train_loss:3.5580 train_time:1248983ms step_avg:363.50ms step:3447/6000 train_loss:3.6355 train_time:1249238ms step_avg:363.47ms step:3448/6000 train_loss:3.4418 train_time:1249492ms step_avg:363.44ms step:3449/6000 train_loss:3.4669 train_time:1249747ms step_avg:363.40ms step:3450/6000 train_loss:3.5463 train_time:1250002ms step_avg:363.37ms step:3451/6000 train_loss:3.5493 train_time:1251329ms step_avg:363.65ms step:3452/6000 train_loss:3.5498 train_time:1251580ms step_avg:363.62ms step:3453/6000 train_loss:3.3552 train_time:1251835ms step_avg:363.59ms step:3454/6000 train_loss:3.4769 train_time:1252090ms step_avg:363.56ms step:3455/6000 train_loss:3.3044 train_time:1252345ms step_avg:363.53ms step:3456/6000 train_loss:3.6399 train_time:1252598ms step_avg:363.49ms step:3457/6000 train_loss:3.3271 train_time:1252853ms step_avg:363.46ms step:3458/6000 train_loss:3.4757 train_time:1253107ms step_avg:363.43ms step:3459/6000 train_loss:3.4099 train_time:1253361ms step_avg:363.40ms step:3460/6000 train_loss:3.4186 train_time:1253616ms step_avg:363.37ms step:3461/6000 train_loss:3.4148 train_time:1254942ms step_avg:363.65ms step:3462/6000 train_loss:3.4218 train_time:1255194ms step_avg:363.61ms step:3463/6000 train_loss:3.5216 train_time:1255449ms step_avg:363.58ms step:3464/6000 train_loss:3.3903 train_time:1255703ms step_avg:363.55ms step:3465/6000 train_loss:3.4085 train_time:1255958ms step_avg:363.52ms step:3466/6000 train_loss:3.3843 train_time:1256212ms step_avg:363.49ms step:3467/6000 train_loss:3.5406 train_time:1256466ms step_avg:363.46ms step:3468/6000 train_loss:3.4276 train_time:1256722ms step_avg:363.42ms step:3469/6000 train_loss:3.4447 train_time:1256975ms step_avg:363.39ms step:3470/6000 train_loss:3.6305 train_time:1257231ms step_avg:363.36ms step:3471/6000 train_loss:3.5388 train_time:1258558ms step_avg:363.64ms step:3472/6000 train_loss:3.5785 train_time:1258811ms step_avg:363.61ms step:3473/6000 train_loss:4.2142 train_time:1259065ms step_avg:363.58ms step:3474/6000 train_loss:3.4496 train_time:1259320ms step_avg:363.54ms step:3475/6000 train_loss:3.4588 train_time:1259573ms step_avg:363.51ms step:3476/6000 train_loss:3.4433 train_time:1259828ms step_avg:363.48ms step:3477/6000 train_loss:3.3849 train_time:1260082ms step_avg:363.45ms step:3478/6000 train_loss:3.4598 train_time:1260338ms step_avg:363.42ms step:3479/6000 train_loss:3.4577 train_time:1260592ms step_avg:363.39ms step:3480/6000 train_loss:3.3626 train_time:1260847ms step_avg:363.36ms step:3481/6000 train_loss:3.6572 train_time:1262174ms step_avg:363.63ms step:3482/6000 train_loss:3.5258 train_time:1262426ms step_avg:363.60ms step:3483/6000 train_loss:3.4722 train_time:1262680ms step_avg:363.57ms step:3484/6000 train_loss:3.4950 train_time:1262935ms step_avg:363.54ms step:3485/6000 train_loss:3.4559 train_time:1263188ms step_avg:363.51ms step:3486/6000 train_loss:3.6523 train_time:1263443ms step_avg:363.48ms step:3487/6000 train_loss:3.6698 train_time:1263697ms step_avg:363.44ms step:3488/6000 train_loss:3.5314 train_time:1263951ms step_avg:363.41ms step:3489/6000 train_loss:3.3849 train_time:1264206ms step_avg:363.38ms step:3490/6000 train_loss:3.5588 train_time:1264462ms step_avg:363.35ms step:3491/6000 train_loss:3.4656 train_time:1265789ms step_avg:363.63ms step:3492/6000 train_loss:3.5022 train_time:1266040ms step_avg:363.60ms step:3493/6000 train_loss:3.3493 train_time:1266295ms step_avg:363.56ms step:3494/6000 train_loss:3.4817 train_time:1266549ms step_avg:363.53ms step:3495/6000 train_loss:3.4307 train_time:1266803ms step_avg:363.50ms step:3496/6000 train_loss:3.4528 train_time:1267059ms step_avg:363.47ms step:3497/6000 train_loss:3.6195 train_time:1267313ms step_avg:363.44ms step:3498/6000 train_loss:3.4621 train_time:1267567ms step_avg:363.41ms step:3499/6000 train_loss:3.4801 train_time:1267822ms step_avg:363.38ms step:3500/6000 train_loss:3.4756 train_time:1268079ms step_avg:363.35ms step:3500/6000 val_loss:3.4485 train_time:1268113ms step_avg:363.36ms step:3501/6000 train_loss:3.5205 train_time:1269408ms step_avg:363.62ms step:3502/6000 train_loss:3.5871 train_time:1269662ms step_avg:363.59ms step:3503/6000 train_loss:3.2962 train_time:1269916ms step_avg:363.56ms step:3504/6000 train_loss:3.4610 train_time:1270171ms step_avg:363.53ms step:3505/6000 train_loss:3.4841 train_time:1270428ms step_avg:363.50ms step:3506/6000 train_loss:3.5065 train_time:1270684ms step_avg:363.47ms step:3507/6000 train_loss:3.3847 train_time:1270940ms step_avg:363.44ms step:3508/6000 train_loss:3.5317 train_time:1271195ms step_avg:363.41ms step:3509/6000 train_loss:3.4255 train_time:1271452ms step_avg:363.38ms step:3510/6000 train_loss:3.6383 train_time:1271708ms step_avg:363.35ms step:3511/6000 train_loss:3.4470 train_time:1273034ms step_avg:363.62ms step:3512/6000 train_loss:3.3972 train_time:1273285ms step_avg:363.59ms step:3513/6000 train_loss:3.4520 train_time:1273538ms step_avg:363.56ms step:3514/6000 train_loss:3.4241 train_time:1273793ms step_avg:363.53ms step:3515/6000 train_loss:3.4801 train_time:1274048ms step_avg:363.49ms step:3516/6000 train_loss:3.4728 train_time:1274304ms step_avg:363.46ms step:3517/6000 train_loss:3.4445 train_time:1274559ms step_avg:363.43ms step:3518/6000 train_loss:3.4672 train_time:1274814ms step_avg:363.40ms step:3519/6000 train_loss:3.4618 train_time:1275069ms step_avg:363.37ms step:3520/6000 train_loss:3.4808 train_time:1275325ms step_avg:363.34ms step:3521/6000 train_loss:3.5499 train_time:1276651ms step_avg:363.61ms step:3522/6000 train_loss:3.4621 train_time:1276906ms step_avg:363.58ms step:3523/6000 train_loss:3.3901 train_time:1277163ms step_avg:363.55ms step:3524/6000 train_loss:3.4292 train_time:1277416ms step_avg:363.52ms step:3525/6000 train_loss:3.4178 train_time:1277671ms step_avg:363.49ms step:3526/6000 train_loss:3.4214 train_time:1277926ms step_avg:363.46ms step:3527/6000 train_loss:3.5522 train_time:1278181ms step_avg:363.43ms step:3528/6000 train_loss:3.3840 train_time:1278436ms step_avg:363.40ms step:3529/6000 train_loss:3.2677 train_time:1278691ms step_avg:363.37ms step:3530/6000 train_loss:3.5698 train_time:1278946ms step_avg:363.34ms step:3531/6000 train_loss:3.3725 train_time:1280273ms step_avg:363.61ms step:3532/6000 train_loss:3.4114 train_time:1280524ms step_avg:363.58ms step:3533/6000 train_loss:3.3204 train_time:1280779ms step_avg:363.55ms step:3534/6000 train_loss:3.3834 train_time:1281034ms step_avg:363.52ms step:3535/6000 train_loss:3.3563 train_time:1281288ms step_avg:363.49ms step:3536/6000 train_loss:3.5338 train_time:1281542ms step_avg:363.45ms step:3537/6000 train_loss:3.4891 train_time:1281798ms step_avg:363.42ms step:3538/6000 train_loss:3.5184 train_time:1282052ms step_avg:363.39ms step:3539/6000 train_loss:3.4169 train_time:1282306ms step_avg:363.36ms step:3540/6000 train_loss:3.3849 train_time:1282562ms step_avg:363.33ms step:3541/6000 train_loss:3.5283 train_time:1283888ms step_avg:363.60ms step:3542/6000 train_loss:3.3745 train_time:1284140ms step_avg:363.57ms step:3543/6000 train_loss:3.5896 train_time:1284395ms step_avg:363.54ms step:3544/6000 train_loss:3.7062 train_time:1284649ms step_avg:363.51ms step:3545/6000 train_loss:3.5260 train_time:1284905ms step_avg:363.48ms step:3546/6000 train_loss:3.5583 train_time:1285160ms step_avg:363.45ms step:3547/6000 train_loss:3.3199 train_time:1285414ms step_avg:363.42ms step:3548/6000 train_loss:3.4045 train_time:1285670ms step_avg:363.39ms step:3549/6000 train_loss:3.4306 train_time:1285925ms step_avg:363.36ms step:3550/6000 train_loss:3.5167 train_time:1286180ms step_avg:363.33ms step:3551/6000 train_loss:3.4840 train_time:1287506ms step_avg:363.60ms step:3552/6000 train_loss:3.4163 train_time:1287758ms step_avg:363.57ms step:3553/6000 train_loss:3.5263 train_time:1288012ms step_avg:363.54ms step:3554/6000 train_loss:3.4575 train_time:1288266ms step_avg:363.51ms step:3555/6000 train_loss:3.4167 train_time:1288521ms step_avg:363.48ms step:3556/6000 train_loss:3.3579 train_time:1288775ms step_avg:363.44ms step:3557/6000 train_loss:3.3207 train_time:1289030ms step_avg:363.41ms step:3558/6000 train_loss:3.4368 train_time:1289285ms step_avg:363.38ms step:3559/6000 train_loss:3.4539 train_time:1289539ms step_avg:363.35ms step:3560/6000 train_loss:3.6565 train_time:1289794ms step_avg:363.32ms step:3561/6000 train_loss:3.5271 train_time:1291120ms step_avg:363.59ms step:3562/6000 train_loss:3.4317 train_time:1291371ms step_avg:363.56ms step:3563/6000 train_loss:3.3123 train_time:1291625ms step_avg:363.53ms step:3564/6000 train_loss:3.8222 train_time:1291881ms step_avg:363.50ms step:3565/6000 train_loss:3.3925 train_time:1292135ms step_avg:363.47ms step:3566/6000 train_loss:3.3402 train_time:1292391ms step_avg:363.44ms step:3567/6000 train_loss:3.3598 train_time:1292645ms step_avg:363.41ms step:3568/6000 train_loss:3.5060 train_time:1292900ms step_avg:363.38ms step:3569/6000 train_loss:3.4328 train_time:1293154ms step_avg:363.35ms step:3570/6000 train_loss:3.5487 train_time:1293408ms step_avg:363.32ms step:3571/6000 train_loss:3.4626 train_time:1294735ms step_avg:363.59ms step:3572/6000 train_loss:3.7521 train_time:1294986ms step_avg:363.56ms step:3573/6000 train_loss:3.3774 train_time:1295242ms step_avg:363.53ms step:3574/6000 train_loss:3.4561 train_time:1295496ms step_avg:363.49ms step:3575/6000 train_loss:3.6175 train_time:1295750ms step_avg:363.46ms step:3576/6000 train_loss:3.4939 train_time:1296006ms step_avg:363.43ms step:3577/6000 train_loss:3.4263 train_time:1296261ms step_avg:363.40ms step:3578/6000 train_loss:3.3984 train_time:1296515ms step_avg:363.37ms step:3579/6000 train_loss:3.4748 train_time:1296771ms step_avg:363.34ms step:3580/6000 train_loss:3.4248 train_time:1297025ms step_avg:363.31ms step:3581/6000 train_loss:3.3153 train_time:1298351ms step_avg:363.58ms step:3582/6000 train_loss:3.3964 train_time:1298607ms step_avg:363.55ms step:3583/6000 train_loss:3.3513 train_time:1298861ms step_avg:363.52ms step:3584/6000 train_loss:3.4531 train_time:1299116ms step_avg:363.49ms step:3585/6000 train_loss:3.5531 train_time:1299370ms step_avg:363.46ms step:3586/6000 train_loss:3.3990 train_time:1299625ms step_avg:363.43ms step:3587/6000 train_loss:3.4459 train_time:1299878ms step_avg:363.40ms step:3588/6000 train_loss:3.4533 train_time:1300132ms step_avg:363.37ms step:3589/6000 train_loss:3.4310 train_time:1300389ms step_avg:363.34ms step:3590/6000 train_loss:3.4342 train_time:1300642ms step_avg:363.31ms step:3591/6000 train_loss:3.5509 train_time:1302109ms step_avg:363.62ms step:3592/6000 train_loss:3.4256 train_time:1302364ms step_avg:363.59ms step:3593/6000 train_loss:3.5072 train_time:1302618ms step_avg:363.56ms step:3594/6000 train_loss:3.4935 train_time:1302873ms step_avg:363.52ms step:3595/6000 train_loss:3.4538 train_time:1303127ms step_avg:363.49ms step:3596/6000 train_loss:3.3813 train_time:1303380ms step_avg:363.46ms step:3597/6000 train_loss:3.3829 train_time:1303634ms step_avg:363.43ms step:3598/6000 train_loss:3.6520 train_time:1303890ms step_avg:363.40ms step:3599/6000 train_loss:3.4290 train_time:1304144ms step_avg:363.37ms step:3600/6000 train_loss:3.4482 train_time:1304399ms step_avg:363.34ms step:3601/6000 train_loss:3.3060 train_time:1305725ms step_avg:363.61ms step:3602/6000 train_loss:3.4887 train_time:1305977ms step_avg:363.58ms step:3603/6000 train_loss:3.4358 train_time:1306232ms step_avg:363.55ms step:3604/6000 train_loss:3.5465 train_time:1306486ms step_avg:363.52ms step:3605/6000 train_loss:3.6358 train_time:1306741ms step_avg:363.49ms step:3606/6000 train_loss:3.4238 train_time:1306996ms step_avg:363.46ms step:3607/6000 train_loss:3.4677 train_time:1307251ms step_avg:363.43ms step:3608/6000 train_loss:3.7428 train_time:1307506ms step_avg:363.40ms step:3609/6000 train_loss:3.4277 train_time:1307765ms step_avg:363.37ms step:3610/6000 train_loss:3.5771 train_time:1308186ms step_avg:363.38ms step:3611/6000 train_loss:3.3238 train_time:1309512ms step_avg:363.65ms step:3612/6000 train_loss:3.4337 train_time:1309765ms step_avg:363.62ms step:3613/6000 train_loss:3.5009 train_time:1310019ms step_avg:363.59ms step:3614/6000 train_loss:3.6958 train_time:1310273ms step_avg:363.56ms step:3615/6000 train_loss:3.7097 train_time:1310527ms step_avg:363.53ms step:3616/6000 train_loss:3.3588 train_time:1310783ms step_avg:363.50ms step:3617/6000 train_loss:3.4545 train_time:1311038ms step_avg:363.47ms step:3618/6000 train_loss:3.4232 train_time:1311293ms step_avg:363.44ms step:3619/6000 train_loss:3.5570 train_time:1311547ms step_avg:363.41ms step:3620/6000 train_loss:3.4857 train_time:1311803ms step_avg:363.38ms step:3621/6000 train_loss:3.3145 train_time:1313129ms step_avg:363.65ms step:3622/6000 train_loss:3.4571 train_time:1313381ms step_avg:363.62ms step:3623/6000 train_loss:3.4555 train_time:1313635ms step_avg:363.59ms step:3624/6000 train_loss:3.3942 train_time:1313889ms step_avg:363.56ms step:3625/6000 train_loss:3.5141 train_time:1314144ms step_avg:363.53ms step:3625/6000 val_loss:3.4439 train_time:1314178ms step_avg:363.53ms step:3626/6000 train_loss:3.5886 train_time:1314402ms step_avg:363.50ms step:3627/6000 train_loss:3.5922 train_time:1314657ms step_avg:363.47ms step:3628/6000 train_loss:3.4745 train_time:1314913ms step_avg:363.44ms step:3629/6000 train_loss:3.6229 train_time:1315168ms step_avg:363.41ms step:3630/6000 train_loss:3.4561 train_time:1315425ms step_avg:363.38ms step:3631/6000 train_loss:3.4485 train_time:1316751ms step_avg:363.64ms step:3632/6000 train_loss:3.5330 train_time:1317004ms step_avg:363.61ms step:3633/6000 train_loss:3.5144 train_time:1317258ms step_avg:363.58ms step:3634/6000 train_loss:3.4379 train_time:1317513ms step_avg:363.55ms step:3635/6000 train_loss:3.4426 train_time:1317767ms step_avg:363.52ms step:3636/6000 train_loss:3.4887 train_time:1318022ms step_avg:363.49ms step:3637/6000 train_loss:3.6717 train_time:1318277ms step_avg:363.46ms step:3638/6000 train_loss:3.4673 train_time:1318532ms step_avg:363.43ms step:3639/6000 train_loss:3.4353 train_time:1318786ms step_avg:363.40ms step:3640/6000 train_loss:3.4329 train_time:1319042ms step_avg:363.37ms step:3641/6000 train_loss:3.7111 train_time:1320367ms step_avg:363.64ms step:3642/6000 train_loss:3.4508 train_time:1320622ms step_avg:363.61ms step:3643/6000 train_loss:3.4996 train_time:1320876ms step_avg:363.58ms step:3644/6000 train_loss:3.4756 train_time:1321131ms step_avg:363.55ms step:3645/6000 train_loss:3.3962 train_time:1321385ms step_avg:363.52ms step:3646/6000 train_loss:3.5951 train_time:1321641ms step_avg:363.49ms step:3647/6000 train_loss:3.3794 train_time:1321895ms step_avg:363.46ms step:3648/6000 train_loss:3.4546 train_time:1322150ms step_avg:363.43ms step:3649/6000 train_loss:3.5259 train_time:1322405ms step_avg:363.40ms step:3650/6000 train_loss:3.4690 train_time:1322661ms step_avg:363.37ms step:3651/6000 train_loss:3.5090 train_time:1323988ms step_avg:363.63ms step:3652/6000 train_loss:3.5647 train_time:1324242ms step_avg:363.60ms step:3653/6000 train_loss:3.3877 train_time:1324497ms step_avg:363.57ms step:3654/6000 train_loss:3.4875 train_time:1324751ms step_avg:363.54ms step:3655/6000 train_loss:3.5153 train_time:1325006ms step_avg:363.51ms step:3656/6000 train_loss:4.1875 train_time:1325260ms step_avg:363.48ms step:3657/6000 train_loss:3.5721 train_time:1325515ms step_avg:363.45ms step:3658/6000 train_loss:3.4820 train_time:1325769ms step_avg:363.42ms step:3659/6000 train_loss:3.4749 train_time:1326025ms step_avg:363.39ms step:3660/6000 train_loss:3.3504 train_time:1326279ms step_avg:363.36ms step:3661/6000 train_loss:3.4782 train_time:1327606ms step_avg:363.63ms step:3662/6000 train_loss:3.3589 train_time:1327859ms step_avg:363.60ms step:3663/6000 train_loss:3.5053 train_time:1328113ms step_avg:363.57ms step:3664/6000 train_loss:3.5111 train_time:1328367ms step_avg:363.54ms step:3665/6000 train_loss:3.3494 train_time:1328621ms step_avg:363.51ms step:3666/6000 train_loss:3.2800 train_time:1328876ms step_avg:363.48ms step:3667/6000 train_loss:3.7097 train_time:1329131ms step_avg:363.45ms step:3668/6000 train_loss:3.4965 train_time:1329384ms step_avg:363.42ms step:3669/6000 train_loss:3.5285 train_time:1329641ms step_avg:363.39ms step:3670/6000 train_loss:3.4444 train_time:1329896ms step_avg:363.36ms step:3671/6000 train_loss:3.5120 train_time:1331224ms step_avg:363.62ms step:3672/6000 train_loss:3.4090 train_time:1331474ms step_avg:363.59ms step:3673/6000 train_loss:3.4116 train_time:1331729ms step_avg:363.56ms step:3674/6000 train_loss:3.3135 train_time:1331984ms step_avg:363.53ms step:3675/6000 train_loss:3.3901 train_time:1332238ms step_avg:363.50ms step:3676/6000 train_loss:3.5420 train_time:1332493ms step_avg:363.47ms step:3677/6000 train_loss:3.3490 train_time:1332747ms step_avg:363.44ms step:3678/6000 train_loss:3.5077 train_time:1333003ms step_avg:363.41ms step:3679/6000 train_loss:3.4936 train_time:1333258ms step_avg:363.38ms step:3680/6000 train_loss:3.4015 train_time:1333513ms step_avg:363.36ms step:3681/6000 train_loss:3.4653 train_time:1334841ms step_avg:363.62ms step:3682/6000 train_loss:3.5285 train_time:1335092ms step_avg:363.59ms step:3683/6000 train_loss:3.6306 train_time:1335347ms step_avg:363.56ms step:3684/6000 train_loss:3.3712 train_time:1335602ms step_avg:363.53ms step:3685/6000 train_loss:3.4482 train_time:1335857ms step_avg:363.50ms step:3686/6000 train_loss:3.5992 train_time:1336111ms step_avg:363.47ms step:3687/6000 train_loss:3.3725 train_time:1336365ms step_avg:363.44ms step:3688/6000 train_loss:3.5797 train_time:1336623ms step_avg:363.41ms step:3689/6000 train_loss:3.3210 train_time:1336876ms step_avg:363.38ms step:3690/6000 train_loss:3.4022 train_time:1337131ms step_avg:363.35ms step:3691/6000 train_loss:3.5356 train_time:1338458ms step_avg:363.61ms step:3692/6000 train_loss:3.3259 train_time:1338710ms step_avg:363.58ms step:3693/6000 train_loss:3.4780 train_time:1338964ms step_avg:363.55ms step:3694/6000 train_loss:3.4610 train_time:1339219ms step_avg:363.52ms step:3695/6000 train_loss:3.4601 train_time:1339473ms step_avg:363.49ms step:3696/6000 train_loss:3.4923 train_time:1339728ms step_avg:363.46ms step:3697/6000 train_loss:3.3359 train_time:1339983ms step_avg:363.43ms step:3698/6000 train_loss:3.4694 train_time:1340238ms step_avg:363.41ms step:3699/6000 train_loss:3.4797 train_time:1340493ms step_avg:363.38ms step:3700/6000 train_loss:3.4563 train_time:1340747ms step_avg:363.35ms step:3701/6000 train_loss:3.5223 train_time:1342073ms step_avg:363.61ms step:3702/6000 train_loss:3.4861 train_time:1342325ms step_avg:363.58ms step:3703/6000 train_loss:3.3989 train_time:1342580ms step_avg:363.55ms step:3704/6000 train_loss:3.3792 train_time:1342834ms step_avg:363.52ms step:3705/6000 train_loss:3.5176 train_time:1343089ms step_avg:363.49ms step:3706/6000 train_loss:3.5210 train_time:1343345ms step_avg:363.46ms step:3707/6000 train_loss:3.5262 train_time:1343601ms step_avg:363.43ms step:3708/6000 train_loss:3.4816 train_time:1343855ms step_avg:363.40ms step:3709/6000 train_loss:3.3540 train_time:1344110ms step_avg:363.37ms step:3710/6000 train_loss:3.6528 train_time:1344364ms step_avg:363.34ms step:3711/6000 train_loss:3.2400 train_time:1345692ms step_avg:363.60ms step:3712/6000 train_loss:3.5153 train_time:1345944ms step_avg:363.57ms step:3713/6000 train_loss:3.3970 train_time:1346201ms step_avg:363.54ms step:3714/6000 train_loss:3.4372 train_time:1346454ms step_avg:363.51ms step:3715/6000 train_loss:3.8162 train_time:1346709ms step_avg:363.48ms step:3716/6000 train_loss:3.6601 train_time:1346964ms step_avg:363.46ms step:3717/6000 train_loss:3.9190 train_time:1347220ms step_avg:363.43ms step:3718/6000 train_loss:3.4236 train_time:1347475ms step_avg:363.40ms step:3719/6000 train_loss:3.3411 train_time:1347729ms step_avg:363.37ms step:3720/6000 train_loss:3.6076 train_time:1347983ms step_avg:363.34ms step:3721/6000 train_loss:3.3635 train_time:1349311ms step_avg:363.60ms step:3722/6000 train_loss:3.4608 train_time:1349562ms step_avg:363.57ms step:3723/6000 train_loss:3.3312 train_time:1349816ms step_avg:363.54ms step:3724/6000 train_loss:3.3172 train_time:1350070ms step_avg:363.51ms step:3725/6000 train_loss:3.4336 train_time:1350325ms step_avg:363.48ms step:3726/6000 train_loss:3.3954 train_time:1350579ms step_avg:363.45ms step:3727/6000 train_loss:3.6639 train_time:1350835ms step_avg:363.42ms step:3728/6000 train_loss:3.4011 train_time:1351089ms step_avg:363.39ms step:3729/6000 train_loss:3.3833 train_time:1351345ms step_avg:363.36ms step:3730/6000 train_loss:3.7476 train_time:1351600ms step_avg:363.33ms step:3731/6000 train_loss:3.5047 train_time:1352925ms step_avg:363.59ms step:3732/6000 train_loss:3.4080 train_time:1353177ms step_avg:363.56ms step:3733/6000 train_loss:3.2521 train_time:1353431ms step_avg:363.53ms step:3734/6000 train_loss:3.5158 train_time:1353685ms step_avg:363.50ms step:3735/6000 train_loss:3.3806 train_time:1353940ms step_avg:363.47ms step:3736/6000 train_loss:3.4749 train_time:1354194ms step_avg:363.44ms step:3737/6000 train_loss:3.3881 train_time:1354449ms step_avg:363.42ms step:3738/6000 train_loss:3.4856 train_time:1354704ms step_avg:363.39ms step:3739/6000 train_loss:3.3916 train_time:1354958ms step_avg:363.36ms step:3740/6000 train_loss:3.4350 train_time:1355213ms step_avg:363.33ms step:3741/6000 train_loss:3.7355 train_time:1356540ms step_avg:363.59ms step:3742/6000 train_loss:3.3927 train_time:1356793ms step_avg:363.56ms step:3743/6000 train_loss:3.4403 train_time:1357049ms step_avg:363.53ms step:3744/6000 train_loss:3.6539 train_time:1357304ms step_avg:363.50ms step:3745/6000 train_loss:3.3753 train_time:1357558ms step_avg:363.47ms step:3746/6000 train_loss:3.3079 train_time:1357814ms step_avg:363.44ms step:3747/6000 train_loss:3.4903 train_time:1358068ms step_avg:363.41ms step:3748/6000 train_loss:3.3519 train_time:1358324ms step_avg:363.38ms step:3749/6000 train_loss:3.3835 train_time:1358578ms step_avg:363.35ms step:3750/6000 train_loss:3.5734 train_time:1358833ms step_avg:363.32ms step:3750/6000 val_loss:3.4381 train_time:1358868ms step_avg:363.33ms step:3751/6000 train_loss:3.4617 train_time:1360163ms step_avg:363.58ms step:3752/6000 train_loss:3.6929 train_time:1360414ms step_avg:363.55ms step:3753/6000 train_loss:3.4148 train_time:1360669ms step_avg:363.52ms step:3754/6000 train_loss:3.4185 train_time:1360925ms step_avg:363.49ms step:3755/6000 train_loss:3.3830 train_time:1361179ms step_avg:363.47ms step:3756/6000 train_loss:3.4836 train_time:1361434ms step_avg:363.44ms step:3757/6000 train_loss:3.4313 train_time:1361688ms step_avg:363.41ms step:3758/6000 train_loss:3.4428 train_time:1361944ms step_avg:363.38ms step:3759/6000 train_loss:3.6198 train_time:1362199ms step_avg:363.35ms step:3760/6000 train_loss:3.5130 train_time:1362454ms step_avg:363.32ms step:3761/6000 train_loss:3.6169 train_time:1363779ms step_avg:363.58ms step:3762/6000 train_loss:3.3862 train_time:1364031ms step_avg:363.55ms step:3763/6000 train_loss:3.4007 train_time:1364286ms step_avg:363.52ms step:3764/6000 train_loss:3.5666 train_time:1364541ms step_avg:363.49ms step:3765/6000 train_loss:3.3302 train_time:1364795ms step_avg:363.46ms step:3766/6000 train_loss:3.4162 train_time:1365050ms step_avg:363.43ms step:3767/6000 train_loss:3.5111 train_time:1365305ms step_avg:363.40ms step:3768/6000 train_loss:3.3147 train_time:1365560ms step_avg:363.37ms step:3769/6000 train_loss:3.5819 train_time:1365814ms step_avg:363.34ms step:3770/6000 train_loss:3.3995 train_time:1366072ms step_avg:363.32ms step:3771/6000 train_loss:3.2738 train_time:1367398ms step_avg:363.57ms step:3772/6000 train_loss:3.5241 train_time:1367651ms step_avg:363.54ms step:3773/6000 train_loss:3.4452 train_time:1367905ms step_avg:363.51ms step:3774/6000 train_loss:3.4400 train_time:1368161ms step_avg:363.49ms step:3775/6000 train_loss:3.4400 train_time:1368415ms step_avg:363.46ms step:3776/6000 train_loss:3.4830 train_time:1368670ms step_avg:363.43ms step:3777/6000 train_loss:3.3211 train_time:1368925ms step_avg:363.40ms step:3778/6000 train_loss:3.4353 train_time:1369180ms step_avg:363.37ms step:3779/6000 train_loss:3.5503 train_time:1369435ms step_avg:363.34ms step:3780/6000 train_loss:3.5241 train_time:1369825ms step_avg:363.35ms step:3781/6000 train_loss:3.5241 train_time:1371152ms step_avg:363.60ms step:3782/6000 train_loss:3.4741 train_time:1371404ms step_avg:363.57ms step:3783/6000 train_loss:3.4815 train_time:1371658ms step_avg:363.55ms step:3784/6000 train_loss:3.4438 train_time:1371913ms step_avg:363.52ms step:3785/6000 train_loss:3.3348 train_time:1372168ms step_avg:363.49ms step:3786/6000 train_loss:3.4034 train_time:1372423ms step_avg:363.46ms step:3787/6000 train_loss:3.4542 train_time:1372678ms step_avg:363.43ms step:3788/6000 train_loss:3.4412 train_time:1372934ms step_avg:363.40ms step:3789/6000 train_loss:3.3882 train_time:1373188ms step_avg:363.37ms step:3790/6000 train_loss:3.4153 train_time:1373444ms step_avg:363.34ms step:3791/6000 train_loss:3.2887 train_time:1374770ms step_avg:363.60ms step:3792/6000 train_loss:3.5029 train_time:1375023ms step_avg:363.57ms step:3793/6000 train_loss:3.4955 train_time:1375278ms step_avg:363.54ms step:3794/6000 train_loss:3.4154 train_time:1375532ms step_avg:363.51ms step:3795/6000 train_loss:3.3763 train_time:1375787ms step_avg:363.48ms step:3796/6000 train_loss:3.2254 train_time:1376043ms step_avg:363.46ms step:3797/6000 train_loss:3.4087 train_time:1376296ms step_avg:363.43ms step:3798/6000 train_loss:3.4200 train_time:1376551ms step_avg:363.40ms step:3799/6000 train_loss:3.4843 train_time:1376806ms step_avg:363.37ms step:3800/6000 train_loss:3.3829 train_time:1377220ms step_avg:363.38ms step:3801/6000 train_loss:3.3817 train_time:1378548ms step_avg:363.64ms step:3802/6000 train_loss:3.3244 train_time:1378799ms step_avg:363.61ms step:3803/6000 train_loss:3.6345 train_time:1379054ms step_avg:363.58ms step:3804/6000 train_loss:3.4820 train_time:1379308ms step_avg:363.55ms step:3805/6000 train_loss:3.3417 train_time:1379563ms step_avg:363.52ms step:3806/6000 train_loss:3.5792 train_time:1379817ms step_avg:363.49ms step:3807/6000 train_loss:3.5437 train_time:1380072ms step_avg:363.46ms step:3808/6000 train_loss:3.4136 train_time:1380326ms step_avg:363.44ms step:3809/6000 train_loss:3.4774 train_time:1380579ms step_avg:363.41ms step:3810/6000 train_loss:3.3885 train_time:1380834ms step_avg:363.38ms step:3811/6000 train_loss:3.4604 train_time:1382160ms step_avg:363.63ms step:3812/6000 train_loss:3.4360 train_time:1382411ms step_avg:363.60ms step:3813/6000 train_loss:3.4641 train_time:1382667ms step_avg:363.57ms step:3814/6000 train_loss:3.4631 train_time:1382921ms step_avg:363.54ms step:3815/6000 train_loss:3.3557 train_time:1383176ms step_avg:363.52ms step:3816/6000 train_loss:3.6838 train_time:1383431ms step_avg:363.49ms step:3817/6000 train_loss:3.2966 train_time:1383687ms step_avg:363.46ms step:3818/6000 train_loss:3.4658 train_time:1383942ms step_avg:363.43ms step:3819/6000 train_loss:3.4470 train_time:1384196ms step_avg:363.40ms step:3820/6000 train_loss:3.4249 train_time:1384452ms step_avg:363.37ms step:3821/6000 train_loss:3.3651 train_time:1385777ms step_avg:363.63ms step:3822/6000 train_loss:3.5365 train_time:1386029ms step_avg:363.60ms step:3823/6000 train_loss:3.2673 train_time:1386284ms step_avg:363.57ms step:3824/6000 train_loss:3.3750 train_time:1386539ms step_avg:363.54ms step:3825/6000 train_loss:3.4255 train_time:1386792ms step_avg:363.51ms step:3826/6000 train_loss:3.5723 train_time:1387047ms step_avg:363.48ms step:3827/6000 train_loss:3.5154 train_time:1387302ms step_avg:363.45ms step:3828/6000 train_loss:3.8861 train_time:1387557ms step_avg:363.43ms step:3829/6000 train_loss:3.5071 train_time:1387812ms step_avg:363.40ms step:3830/6000 train_loss:3.3329 train_time:1388068ms step_avg:363.37ms step:3831/6000 train_loss:3.3790 train_time:1389395ms step_avg:363.62ms step:3832/6000 train_loss:3.5832 train_time:1389647ms step_avg:363.59ms step:3833/6000 train_loss:3.4022 train_time:1389901ms step_avg:363.56ms step:3834/6000 train_loss:3.5238 train_time:1390156ms step_avg:363.53ms step:3835/6000 train_loss:3.4565 train_time:1390410ms step_avg:363.51ms step:3836/6000 train_loss:3.2617 train_time:1390666ms step_avg:363.48ms step:3837/6000 train_loss:3.5457 train_time:1390920ms step_avg:363.45ms step:3838/6000 train_loss:3.5164 train_time:1391175ms step_avg:363.42ms step:3839/6000 train_loss:3.4729 train_time:1391430ms step_avg:363.39ms step:3840/6000 train_loss:3.5402 train_time:1391686ms step_avg:363.36ms step:3841/6000 train_loss:3.6602 train_time:1393013ms step_avg:363.62ms step:3842/6000 train_loss:3.4197 train_time:1393264ms step_avg:363.59ms step:3843/6000 train_loss:3.4606 train_time:1393519ms step_avg:363.56ms step:3844/6000 train_loss:3.6112 train_time:1393775ms step_avg:363.53ms step:3845/6000 train_loss:3.4015 train_time:1394029ms step_avg:363.50ms step:3846/6000 train_loss:3.2708 train_time:1394285ms step_avg:363.47ms step:3847/6000 train_loss:3.5112 train_time:1394540ms step_avg:363.45ms step:3848/6000 train_loss:3.4368 train_time:1394794ms step_avg:363.42ms step:3849/6000 train_loss:3.4803 train_time:1395049ms step_avg:363.39ms step:3850/6000 train_loss:3.3710 train_time:1395304ms step_avg:363.36ms step:3851/6000 train_loss:3.3575 train_time:1396630ms step_avg:363.61ms step:3852/6000 train_loss:3.5170 train_time:1396885ms step_avg:363.58ms step:3853/6000 train_loss:3.3618 train_time:1397139ms step_avg:363.55ms step:3854/6000 train_loss:3.3363 train_time:1397392ms step_avg:363.53ms step:3855/6000 train_loss:3.4182 train_time:1397647ms step_avg:363.50ms step:3856/6000 train_loss:3.4564 train_time:1397903ms step_avg:363.47ms step:3857/6000 train_loss:3.4219 train_time:1398158ms step_avg:363.44ms step:3858/6000 train_loss:3.4476 train_time:1398414ms step_avg:363.41ms step:3859/6000 train_loss:3.4343 train_time:1398669ms step_avg:363.39ms step:3860/6000 train_loss:3.4552 train_time:1398923ms step_avg:363.36ms step:3861/6000 train_loss:3.6066 train_time:1400249ms step_avg:363.61ms step:3862/6000 train_loss:3.4113 train_time:1400502ms step_avg:363.58ms step:3863/6000 train_loss:3.5313 train_time:1400756ms step_avg:363.55ms step:3864/6000 train_loss:3.4823 train_time:1401011ms step_avg:363.52ms step:3865/6000 train_loss:3.5344 train_time:1401265ms step_avg:363.49ms step:3866/6000 train_loss:3.5036 train_time:1401520ms step_avg:363.46ms step:3867/6000 train_loss:3.4450 train_time:1401775ms step_avg:363.44ms step:3868/6000 train_loss:3.5374 train_time:1402030ms step_avg:363.41ms step:3869/6000 train_loss:3.6802 train_time:1402285ms step_avg:363.38ms step:3870/6000 train_loss:3.5343 train_time:1402540ms step_avg:363.35ms step:3871/6000 train_loss:3.4278 train_time:1403866ms step_avg:363.60ms step:3872/6000 train_loss:3.5703 train_time:1404119ms step_avg:363.57ms step:3873/6000 train_loss:3.4722 train_time:1404374ms step_avg:363.54ms step:3874/6000 train_loss:3.4148 train_time:1404629ms step_avg:363.52ms step:3875/6000 train_loss:3.5114 train_time:1404884ms step_avg:363.49ms step:3875/6000 val_loss:3.4334 train_time:1404918ms step_avg:363.50ms step:3876/6000 train_loss:4.0536 train_time:1405142ms step_avg:363.46ms step:3877/6000 train_loss:3.4547 train_time:1405398ms step_avg:363.43ms step:3878/6000 train_loss:3.4474 train_time:1405653ms step_avg:363.41ms step:3879/6000 train_loss:3.4318 train_time:1405909ms step_avg:363.38ms step:3880/6000 train_loss:3.6339 train_time:1406163ms step_avg:363.35ms step:3881/6000 train_loss:3.4433 train_time:1407490ms step_avg:363.60ms step:3882/6000 train_loss:3.5098 train_time:1407743ms step_avg:363.57ms step:3883/6000 train_loss:3.5514 train_time:1407998ms step_avg:363.54ms step:3884/6000 train_loss:3.3731 train_time:1408252ms step_avg:363.51ms step:3885/6000 train_loss:3.3630 train_time:1408507ms step_avg:363.49ms step:3886/6000 train_loss:3.4022 train_time:1408762ms step_avg:363.46ms step:3887/6000 train_loss:3.4372 train_time:1409019ms step_avg:363.43ms step:3888/6000 train_loss:3.6232 train_time:1409274ms step_avg:363.40ms step:3889/6000 train_loss:3.4773 train_time:1409528ms step_avg:363.37ms step:3890/6000 train_loss:3.3999 train_time:1409783ms step_avg:363.35ms step:3891/6000 train_loss:3.5454 train_time:1411109ms step_avg:363.59ms step:3892/6000 train_loss:3.4108 train_time:1411361ms step_avg:363.57ms step:3893/6000 train_loss:3.6483 train_time:1411616ms step_avg:363.54ms step:3894/6000 train_loss:3.3873 train_time:1411870ms step_avg:363.51ms step:3895/6000 train_loss:3.4047 train_time:1412126ms step_avg:363.48ms step:3896/6000 train_loss:3.4842 train_time:1412381ms step_avg:363.45ms step:3897/6000 train_loss:3.7263 train_time:1412635ms step_avg:363.43ms step:3898/6000 train_loss:3.2840 train_time:1412889ms step_avg:363.40ms step:3899/6000 train_loss:3.4048 train_time:1413144ms step_avg:363.37ms step:3900/6000 train_loss:3.5427 train_time:1413399ms step_avg:363.34ms step:3901/6000 train_loss:3.4714 train_time:1414726ms step_avg:363.59ms step:3902/6000 train_loss:3.5058 train_time:1414981ms step_avg:363.56ms step:3903/6000 train_loss:3.7975 train_time:1415235ms step_avg:363.53ms step:3904/6000 train_loss:3.3922 train_time:1415489ms step_avg:363.51ms step:3905/6000 train_loss:3.4147 train_time:1415743ms step_avg:363.48ms step:3906/6000 train_loss:3.3640 train_time:1415998ms step_avg:363.45ms step:3907/6000 train_loss:3.5215 train_time:1416252ms step_avg:363.42ms step:3908/6000 train_loss:3.5387 train_time:1416508ms step_avg:363.39ms step:3909/6000 train_loss:3.5304 train_time:1416762ms step_avg:363.37ms step:3910/6000 train_loss:3.4786 train_time:1417018ms step_avg:363.34ms step:3911/6000 train_loss:3.4121 train_time:1418344ms step_avg:363.58ms step:3912/6000 train_loss:3.4377 train_time:1418596ms step_avg:363.56ms step:3913/6000 train_loss:3.4218 train_time:1418852ms step_avg:363.53ms step:3914/6000 train_loss:3.5436 train_time:1419107ms step_avg:363.50ms step:3915/6000 train_loss:3.3784 train_time:1419362ms step_avg:363.47ms step:3916/6000 train_loss:3.3572 train_time:1419616ms step_avg:363.45ms step:3917/6000 train_loss:3.3477 train_time:1419870ms step_avg:363.42ms step:3918/6000 train_loss:3.4730 train_time:1420125ms step_avg:363.39ms step:3919/6000 train_loss:3.5965 train_time:1420380ms step_avg:363.36ms step:3920/6000 train_loss:3.3803 train_time:1420634ms step_avg:363.33ms step:3921/6000 train_loss:3.3522 train_time:1421962ms step_avg:363.58ms step:3922/6000 train_loss:3.4321 train_time:1422214ms step_avg:363.55ms step:3923/6000 train_loss:3.4235 train_time:1422469ms step_avg:363.52ms step:3924/6000 train_loss:3.4404 train_time:1422724ms step_avg:363.50ms step:3925/6000 train_loss:3.5221 train_time:1422979ms step_avg:363.47ms step:3926/6000 train_loss:3.4858 train_time:1423234ms step_avg:363.44ms step:3927/6000 train_loss:3.5840 train_time:1423489ms step_avg:363.41ms step:3928/6000 train_loss:3.4644 train_time:1423743ms step_avg:363.39ms step:3929/6000 train_loss:3.3187 train_time:1423998ms step_avg:363.36ms step:3930/6000 train_loss:3.6436 train_time:1424252ms step_avg:363.33ms step:3931/6000 train_loss:3.4286 train_time:1425581ms step_avg:363.58ms step:3932/6000 train_loss:3.4771 train_time:1425833ms step_avg:363.55ms step:3933/6000 train_loss:3.5173 train_time:1426089ms step_avg:363.52ms step:3934/6000 train_loss:3.4001 train_time:1426342ms step_avg:363.49ms step:3935/6000 train_loss:3.5218 train_time:1426598ms step_avg:363.46ms step:3936/6000 train_loss:3.5279 train_time:1426852ms step_avg:363.44ms step:3937/6000 train_loss:3.4525 train_time:1427107ms step_avg:363.41ms step:3938/6000 train_loss:3.5133 train_time:1427361ms step_avg:363.38ms step:3939/6000 train_loss:3.4353 train_time:1427617ms step_avg:363.35ms step:3940/6000 train_loss:3.2119 train_time:1427871ms step_avg:363.33ms step:3941/6000 train_loss:3.4138 train_time:1429198ms step_avg:363.57ms step:3942/6000 train_loss:3.5180 train_time:1429449ms step_avg:363.54ms step:3943/6000 train_loss:3.6053 train_time:1429703ms step_avg:363.51ms step:3944/6000 train_loss:3.6516 train_time:1429957ms step_avg:363.49ms step:3945/6000 train_loss:3.4836 train_time:1430213ms step_avg:363.46ms step:3946/6000 train_loss:3.3918 train_time:1430467ms step_avg:363.43ms step:3947/6000 train_loss:3.4137 train_time:1430721ms step_avg:363.40ms step:3948/6000 train_loss:3.4948 train_time:1430976ms step_avg:363.38ms step:3949/6000 train_loss:3.2985 train_time:1431230ms step_avg:363.35ms step:3950/6000 train_loss:3.4982 train_time:1431484ms step_avg:363.32ms step:3951/6000 train_loss:3.4316 train_time:1432810ms step_avg:363.57ms step:3952/6000 train_loss:3.2486 train_time:1433062ms step_avg:363.54ms step:3953/6000 train_loss:3.3061 train_time:1433319ms step_avg:363.51ms step:3954/6000 train_loss:3.5743 train_time:1433573ms step_avg:363.48ms step:3955/6000 train_loss:3.4935 train_time:1433829ms step_avg:363.45ms step:3956/6000 train_loss:3.4274 train_time:1434084ms step_avg:363.43ms step:3957/6000 train_loss:3.4969 train_time:1434337ms step_avg:363.40ms step:3958/6000 train_loss:3.2121 train_time:1434591ms step_avg:363.37ms step:3959/6000 train_loss:3.5010 train_time:1434845ms step_avg:363.34ms step:3960/6000 train_loss:3.4506 train_time:1435100ms step_avg:363.32ms step:3961/6000 train_loss:3.4120 train_time:1436426ms step_avg:363.56ms step:3962/6000 train_loss:3.4354 train_time:1436680ms step_avg:363.53ms step:3963/6000 train_loss:3.4525 train_time:1436933ms step_avg:363.50ms step:3964/6000 train_loss:3.4736 train_time:1437189ms step_avg:363.48ms step:3965/6000 train_loss:3.3340 train_time:1437443ms step_avg:363.45ms step:3966/6000 train_loss:3.4529 train_time:1437698ms step_avg:363.42ms step:3967/6000 train_loss:3.5279 train_time:1437953ms step_avg:363.39ms step:3968/6000 train_loss:3.4336 train_time:1438207ms step_avg:363.37ms step:3969/6000 train_loss:3.5243 train_time:1438604ms step_avg:363.38ms step:3970/6000 train_loss:3.4094 train_time:1438858ms step_avg:363.35ms step:3971/6000 train_loss:3.5996 train_time:1440184ms step_avg:363.59ms step:3972/6000 train_loss:3.5273 train_time:1440435ms step_avg:363.56ms step:3973/6000 train_loss:3.4762 train_time:1440690ms step_avg:363.54ms step:3974/6000 train_loss:3.3733 train_time:1440944ms step_avg:363.51ms step:3975/6000 train_loss:3.4199 train_time:1441199ms step_avg:363.48ms step:3976/6000 train_loss:3.4911 train_time:1441452ms step_avg:363.45ms step:3977/6000 train_loss:3.4020 train_time:1441709ms step_avg:363.43ms step:3978/6000 train_loss:3.4599 train_time:1441964ms step_avg:363.40ms step:3979/6000 train_loss:3.5446 train_time:1442219ms step_avg:363.37ms step:3980/6000 train_loss:3.4747 train_time:1442476ms step_avg:363.34ms step:3981/6000 train_loss:3.4908 train_time:1443804ms step_avg:363.59ms step:3982/6000 train_loss:3.6918 train_time:1444055ms step_avg:363.56ms step:3983/6000 train_loss:3.4178 train_time:1444311ms step_avg:363.53ms step:3984/6000 train_loss:3.4869 train_time:1444565ms step_avg:363.50ms step:3985/6000 train_loss:3.4231 train_time:1444820ms step_avg:363.48ms step:3986/6000 train_loss:3.3459 train_time:1445074ms step_avg:363.45ms step:3987/6000 train_loss:3.3859 train_time:1445329ms step_avg:363.42ms step:3988/6000 train_loss:3.4133 train_time:1445584ms step_avg:363.39ms step:3989/6000 train_loss:3.1368 train_time:1445838ms step_avg:363.37ms step:3990/6000 train_loss:3.4672 train_time:1446257ms step_avg:363.38ms step:3991/6000 train_loss:3.4335 train_time:1447583ms step_avg:363.62ms step:3992/6000 train_loss:3.2887 train_time:1447834ms step_avg:363.59ms step:3993/6000 train_loss:3.3936 train_time:1448090ms step_avg:363.57ms step:3994/6000 train_loss:3.5879 train_time:1448344ms step_avg:363.54ms step:3995/6000 train_loss:3.4090 train_time:1448600ms step_avg:363.51ms step:3996/6000 train_loss:3.3250 train_time:1448854ms step_avg:363.49ms step:3997/6000 train_loss:3.4787 train_time:1449109ms step_avg:363.46ms step:3998/6000 train_loss:3.3865 train_time:1449363ms step_avg:363.43ms step:3999/6000 train_loss:3.3502 train_time:1449618ms step_avg:363.40ms step:4000/6000 train_loss:3.4283 train_time:1449872ms step_avg:363.38ms step:4000/6000 val_loss:3.4265 train_time:1449907ms step_avg:363.39ms step:4001/6000 train_loss:3.5433 train_time:1451204ms step_avg:363.62ms step:4002/6000 train_loss:3.6162 train_time:1451457ms step_avg:363.59ms step:4003/6000 train_loss:3.2873 train_time:1451712ms step_avg:363.56ms step:4004/6000 train_loss:3.4833 train_time:1451966ms step_avg:363.54ms step:4005/6000 train_loss:3.3883 train_time:1452220ms step_avg:363.51ms step:4006/6000 train_loss:3.4384 train_time:1452474ms step_avg:363.48ms step:4007/6000 train_loss:3.4253 train_time:1452730ms step_avg:363.46ms step:4008/6000 train_loss:3.6176 train_time:1452985ms step_avg:363.43ms step:4009/6000 train_loss:3.2082 train_time:1453240ms step_avg:363.40ms step:4010/6000 train_loss:3.4120 train_time:1453495ms step_avg:363.37ms step:4011/6000 train_loss:3.3965 train_time:1454821ms step_avg:363.61ms step:4012/6000 train_loss:3.3551 train_time:1455074ms step_avg:363.59ms step:4013/6000 train_loss:3.5225 train_time:1455328ms step_avg:363.56ms step:4014/6000 train_loss:3.3929 train_time:1455582ms step_avg:363.53ms step:4015/6000 train_loss:3.4892 train_time:1455838ms step_avg:363.51ms step:4016/6000 train_loss:3.5901 train_time:1456092ms step_avg:363.48ms step:4017/6000 train_loss:3.5651 train_time:1456347ms step_avg:363.45ms step:4018/6000 train_loss:3.3215 train_time:1456601ms step_avg:363.42ms step:4019/6000 train_loss:3.4589 train_time:1456856ms step_avg:363.40ms step:4020/6000 train_loss:3.3850 train_time:1457110ms step_avg:363.37ms step:4021/6000 train_loss:3.6462 train_time:1458437ms step_avg:363.61ms step:4022/6000 train_loss:3.5203 train_time:1458690ms step_avg:363.58ms step:4023/6000 train_loss:3.5091 train_time:1458944ms step_avg:363.55ms step:4024/6000 train_loss:3.4788 train_time:1459199ms step_avg:363.53ms step:4025/6000 train_loss:3.5067 train_time:1459454ms step_avg:363.50ms step:4026/6000 train_loss:3.2623 train_time:1459708ms step_avg:363.47ms step:4027/6000 train_loss:3.4833 train_time:1459963ms step_avg:363.45ms step:4028/6000 train_loss:3.4368 train_time:1460218ms step_avg:363.42ms step:4029/6000 train_loss:3.3183 train_time:1460474ms step_avg:363.39ms step:4030/6000 train_loss:3.3420 train_time:1460728ms step_avg:363.37ms step:4031/6000 train_loss:3.3981 train_time:1462055ms step_avg:363.60ms step:4032/6000 train_loss:3.4878 train_time:1462306ms step_avg:363.58ms step:4033/6000 train_loss:3.4532 train_time:1462561ms step_avg:363.55ms step:4034/6000 train_loss:3.4347 train_time:1462815ms step_avg:363.52ms step:4035/6000 train_loss:3.4361 train_time:1463069ms step_avg:363.50ms step:4036/6000 train_loss:3.3679 train_time:1463323ms step_avg:363.47ms step:4037/6000 train_loss:3.5421 train_time:1463579ms step_avg:363.44ms step:4038/6000 train_loss:3.4677 train_time:1463834ms step_avg:363.41ms step:4039/6000 train_loss:3.4507 train_time:1464088ms step_avg:363.39ms step:4040/6000 train_loss:3.4460 train_time:1464343ms step_avg:363.36ms step:4041/6000 train_loss:3.4831 train_time:1465670ms step_avg:363.60ms step:4042/6000 train_loss:3.6774 train_time:1465922ms step_avg:363.57ms step:4043/6000 train_loss:3.5823 train_time:1466177ms step_avg:363.55ms step:4044/6000 train_loss:3.3547 train_time:1466432ms step_avg:363.52ms step:4045/6000 train_loss:3.5330 train_time:1466686ms step_avg:363.49ms step:4046/6000 train_loss:3.2351 train_time:1466940ms step_avg:363.46ms step:4047/6000 train_loss:3.4953 train_time:1467196ms step_avg:363.44ms step:4048/6000 train_loss:3.5790 train_time:1467453ms step_avg:363.41ms step:4049/6000 train_loss:3.4588 train_time:1467707ms step_avg:363.38ms step:4050/6000 train_loss:3.3883 train_time:1467962ms step_avg:363.36ms step:4051/6000 train_loss:3.4394 train_time:1469289ms step_avg:363.60ms step:4052/6000 train_loss:3.3570 train_time:1469541ms step_avg:363.57ms step:4053/6000 train_loss:3.5637 train_time:1469796ms step_avg:363.54ms step:4054/6000 train_loss:3.4250 train_time:1470051ms step_avg:363.51ms step:4055/6000 train_loss:3.5046 train_time:1470305ms step_avg:363.49ms step:4056/6000 train_loss:3.4965 train_time:1470560ms step_avg:363.46ms step:4057/6000 train_loss:3.4562 train_time:1470814ms step_avg:363.43ms step:4058/6000 train_loss:3.3245 train_time:1471069ms step_avg:363.41ms step:4059/6000 train_loss:3.4800 train_time:1471325ms step_avg:363.38ms step:4060/6000 train_loss:3.3441 train_time:1471579ms step_avg:363.35ms step:4061/6000 train_loss:3.4218 train_time:1472905ms step_avg:363.59ms step:4062/6000 train_loss:3.5399 train_time:1473159ms step_avg:363.56ms step:4063/6000 train_loss:3.7287 train_time:1473413ms step_avg:363.54ms step:4064/6000 train_loss:3.1014 train_time:1473668ms step_avg:363.51ms step:4065/6000 train_loss:3.4638 train_time:1473922ms step_avg:363.48ms step:4066/6000 train_loss:3.3429 train_time:1474176ms step_avg:363.46ms step:4067/6000 train_loss:3.5102 train_time:1474431ms step_avg:363.43ms step:4068/6000 train_loss:3.5107 train_time:1474684ms step_avg:363.40ms step:4069/6000 train_loss:3.3112 train_time:1474940ms step_avg:363.38ms step:4070/6000 train_loss:3.4865 train_time:1475194ms step_avg:363.35ms step:4071/6000 train_loss:3.2953 train_time:1476522ms step_avg:363.59ms step:4072/6000 train_loss:3.4809 train_time:1476774ms step_avg:363.56ms step:4073/6000 train_loss:3.5863 train_time:1477028ms step_avg:363.53ms step:4074/6000 train_loss:3.5181 train_time:1477282ms step_avg:363.50ms step:4075/6000 train_loss:3.4231 train_time:1477538ms step_avg:363.48ms step:4076/6000 train_loss:3.4221 train_time:1477791ms step_avg:363.45ms step:4077/6000 train_loss:3.2994 train_time:1478045ms step_avg:363.42ms step:4078/6000 train_loss:3.4615 train_time:1478300ms step_avg:363.40ms step:4079/6000 train_loss:3.4826 train_time:1478555ms step_avg:363.37ms step:4080/6000 train_loss:3.2771 train_time:1478809ms step_avg:363.34ms step:4081/6000 train_loss:3.4462 train_time:1480136ms step_avg:363.58ms step:4082/6000 train_loss:3.3922 train_time:1480389ms step_avg:363.55ms step:4083/6000 train_loss:3.4564 train_time:1480644ms step_avg:363.53ms step:4084/6000 train_loss:3.4501 train_time:1480898ms step_avg:363.50ms step:4085/6000 train_loss:3.4856 train_time:1481153ms step_avg:363.47ms step:4086/6000 train_loss:3.4477 train_time:1481406ms step_avg:363.45ms step:4087/6000 train_loss:3.4195 train_time:1481662ms step_avg:363.42ms step:4088/6000 train_loss:3.5297 train_time:1481917ms step_avg:363.39ms step:4089/6000 train_loss:3.3751 train_time:1482171ms step_avg:363.37ms step:4090/6000 train_loss:3.3919 train_time:1482426ms step_avg:363.34ms step:4091/6000 train_loss:3.4236 train_time:1483753ms step_avg:363.58ms step:4092/6000 train_loss:3.3673 train_time:1484004ms step_avg:363.55ms step:4093/6000 train_loss:3.3540 train_time:1484260ms step_avg:363.52ms step:4094/6000 train_loss:3.5425 train_time:1484514ms step_avg:363.50ms step:4095/6000 train_loss:3.5267 train_time:1484768ms step_avg:363.47ms step:4096/6000 train_loss:3.4381 train_time:1485023ms step_avg:363.44ms step:4097/6000 train_loss:3.5059 train_time:1485281ms step_avg:363.42ms step:4098/6000 train_loss:3.2748 train_time:1485535ms step_avg:363.39ms step:4099/6000 train_loss:3.4201 train_time:1485790ms step_avg:363.36ms step:4100/6000 train_loss:3.3876 train_time:1486045ms step_avg:363.34ms step:4101/6000 train_loss:3.1922 train_time:1487371ms step_avg:363.57ms step:4102/6000 train_loss:3.4642 train_time:1487623ms step_avg:363.54ms step:4103/6000 train_loss:3.4197 train_time:1487878ms step_avg:363.52ms step:4104/6000 train_loss:3.2577 train_time:1488133ms step_avg:363.49ms step:4105/6000 train_loss:3.3523 train_time:1488387ms step_avg:363.46ms step:4106/6000 train_loss:3.5055 train_time:1488641ms step_avg:363.44ms step:4107/6000 train_loss:3.5564 train_time:1488897ms step_avg:363.41ms step:4108/6000 train_loss:3.4504 train_time:1489150ms step_avg:363.38ms step:4109/6000 train_loss:3.5390 train_time:1489405ms step_avg:363.36ms step:4110/6000 train_loss:3.5336 train_time:1489660ms step_avg:363.33ms step:4111/6000 train_loss:3.7137 train_time:1490986ms step_avg:363.57ms step:4112/6000 train_loss:3.3493 train_time:1491240ms step_avg:363.54ms step:4113/6000 train_loss:3.4856 train_time:1491494ms step_avg:363.51ms step:4114/6000 train_loss:3.3857 train_time:1491748ms step_avg:363.49ms step:4115/6000 train_loss:3.4927 train_time:1492003ms step_avg:363.46ms step:4116/6000 train_loss:3.4902 train_time:1492259ms step_avg:363.43ms step:4117/6000 train_loss:3.7354 train_time:1492512ms step_avg:363.41ms step:4118/6000 train_loss:3.3002 train_time:1492767ms step_avg:363.38ms step:4119/6000 train_loss:3.4705 train_time:1493023ms step_avg:363.35ms step:4120/6000 train_loss:3.3825 train_time:1493279ms step_avg:363.33ms step:4121/6000 train_loss:3.4953 train_time:1494605ms step_avg:363.56ms step:4122/6000 train_loss:3.4897 train_time:1494859ms step_avg:363.54ms step:4123/6000 train_loss:3.4779 train_time:1495113ms step_avg:363.51ms step:4124/6000 train_loss:3.3032 train_time:1495368ms step_avg:363.48ms step:4125/6000 train_loss:3.3182 train_time:1495623ms step_avg:363.46ms step:4125/6000 val_loss:3.4221 train_time:1495657ms step_avg:363.46ms step:4126/6000 train_loss:3.4417 train_time:1495883ms step_avg:363.43ms step:4127/6000 train_loss:3.3790 train_time:1496138ms step_avg:363.40ms step:4128/6000 train_loss:3.4501 train_time:1496392ms step_avg:363.38ms step:4129/6000 train_loss:3.4473 train_time:1496647ms step_avg:363.35ms step:4130/6000 train_loss:3.2232 train_time:1496902ms step_avg:363.33ms step:4131/6000 train_loss:3.5225 train_time:1498229ms step_avg:363.56ms step:4132/6000 train_loss:3.4750 train_time:1498481ms step_avg:363.53ms step:4133/6000 train_loss:3.3986 train_time:1498738ms step_avg:363.51ms step:4134/6000 train_loss:3.5980 train_time:1498992ms step_avg:363.48ms step:4135/6000 train_loss:3.4271 train_time:1499247ms step_avg:363.45ms step:4136/6000 train_loss:3.4062 train_time:1499501ms step_avg:363.43ms step:4137/6000 train_loss:3.5578 train_time:1499755ms step_avg:363.40ms step:4138/6000 train_loss:3.3956 train_time:1500011ms step_avg:363.37ms step:4139/6000 train_loss:3.4541 train_time:1500265ms step_avg:363.35ms step:4140/6000 train_loss:3.5454 train_time:1500521ms step_avg:363.32ms step:4141/6000 train_loss:3.5789 train_time:1501848ms step_avg:363.56ms step:4142/6000 train_loss:3.5422 train_time:1502099ms step_avg:363.53ms step:4143/6000 train_loss:3.5212 train_time:1502354ms step_avg:363.50ms step:4144/6000 train_loss:3.4174 train_time:1502610ms step_avg:363.48ms step:4145/6000 train_loss:3.3893 train_time:1502864ms step_avg:363.45ms step:4146/6000 train_loss:3.4967 train_time:1503119ms step_avg:363.42ms step:4147/6000 train_loss:3.0599 train_time:1503374ms step_avg:363.40ms step:4148/6000 train_loss:3.4087 train_time:1503630ms step_avg:363.37ms step:4149/6000 train_loss:3.4513 train_time:1503885ms step_avg:363.34ms step:4150/6000 train_loss:3.2592 train_time:1504138ms step_avg:363.32ms step:4151/6000 train_loss:3.3020 train_time:1505465ms step_avg:363.55ms step:4152/6000 train_loss:3.3405 train_time:1505716ms step_avg:363.52ms step:4153/6000 train_loss:3.3924 train_time:1505971ms step_avg:363.50ms step:4154/6000 train_loss:3.4474 train_time:1506225ms step_avg:363.47ms step:4155/6000 train_loss:3.5510 train_time:1506480ms step_avg:363.45ms step:4156/6000 train_loss:3.3694 train_time:1506735ms step_avg:363.42ms step:4157/6000 train_loss:3.2931 train_time:1506990ms step_avg:363.39ms step:4158/6000 train_loss:3.4364 train_time:1507386ms step_avg:363.40ms step:4159/6000 train_loss:3.4431 train_time:1507640ms step_avg:363.37ms step:4160/6000 train_loss:3.3619 train_time:1507895ms step_avg:363.35ms step:4161/6000 train_loss:3.4509 train_time:1509222ms step_avg:363.58ms step:4162/6000 train_loss:3.3800 train_time:1509475ms step_avg:363.55ms step:4163/6000 train_loss:3.5999 train_time:1509731ms step_avg:363.53ms step:4164/6000 train_loss:3.2859 train_time:1509985ms step_avg:363.50ms step:4165/6000 train_loss:3.3942 train_time:1510239ms step_avg:363.48ms step:4166/6000 train_loss:3.3829 train_time:1510495ms step_avg:363.45ms step:4167/6000 train_loss:3.4237 train_time:1510750ms step_avg:363.42ms step:4168/6000 train_loss:3.4070 train_time:1511007ms step_avg:363.40ms step:4169/6000 train_loss:3.4373 train_time:1511262ms step_avg:363.37ms step:4170/6000 train_loss:3.2761 train_time:1511517ms step_avg:363.35ms step:4171/6000 train_loss:3.3781 train_time:1512843ms step_avg:363.58ms step:4172/6000 train_loss:3.5083 train_time:1513096ms step_avg:363.55ms step:4173/6000 train_loss:3.5717 train_time:1513351ms step_avg:363.52ms step:4174/6000 train_loss:3.9467 train_time:1513607ms step_avg:363.50ms step:4175/6000 train_loss:3.3774 train_time:1513861ms step_avg:363.47ms step:4176/6000 train_loss:3.5381 train_time:1514116ms step_avg:363.45ms step:4177/6000 train_loss:3.3438 train_time:1514371ms step_avg:363.42ms step:4178/6000 train_loss:3.3687 train_time:1514625ms step_avg:363.39ms step:4179/6000 train_loss:3.5241 train_time:1514880ms step_avg:363.37ms step:4180/6000 train_loss:3.4754 train_time:1515302ms step_avg:363.38ms step:4181/6000 train_loss:3.4652 train_time:1516628ms step_avg:363.61ms step:4182/6000 train_loss:3.4610 train_time:1516880ms step_avg:363.59ms step:4183/6000 train_loss:3.4950 train_time:1517134ms step_avg:363.56ms step:4184/6000 train_loss:3.9325 train_time:1517389ms step_avg:363.53ms step:4185/6000 train_loss:3.4383 train_time:1517642ms step_avg:363.51ms step:4186/6000 train_loss:3.4916 train_time:1517898ms step_avg:363.48ms step:4187/6000 train_loss:3.5324 train_time:1518153ms step_avg:363.46ms step:4188/6000 train_loss:3.5200 train_time:1518408ms step_avg:363.43ms step:4189/6000 train_loss:3.1702 train_time:1518662ms step_avg:363.40ms step:4190/6000 train_loss:3.5164 train_time:1518917ms step_avg:363.38ms step:4191/6000 train_loss:3.5252 train_time:1520245ms step_avg:363.61ms step:4192/6000 train_loss:3.5088 train_time:1520497ms step_avg:363.58ms step:4193/6000 train_loss:3.4289 train_time:1520751ms step_avg:363.56ms step:4194/6000 train_loss:3.4477 train_time:1521006ms step_avg:363.53ms step:4195/6000 train_loss:3.4348 train_time:1521260ms step_avg:363.50ms step:4196/6000 train_loss:3.3719 train_time:1521515ms step_avg:363.48ms step:4197/6000 train_loss:3.7337 train_time:1521769ms step_avg:363.45ms step:4198/6000 train_loss:3.1744 train_time:1522023ms step_avg:363.42ms step:4199/6000 train_loss:3.5673 train_time:1522279ms step_avg:363.40ms step:4200/6000 train_loss:3.4469 train_time:1522534ms step_avg:363.37ms step:4201/6000 train_loss:3.3285 train_time:1523862ms step_avg:363.60ms step:4202/6000 train_loss:3.4752 train_time:1524115ms step_avg:363.58ms step:4203/6000 train_loss:3.3315 train_time:1524369ms step_avg:363.55ms step:4204/6000 train_loss:3.3508 train_time:1524626ms step_avg:363.53ms step:4205/6000 train_loss:3.3693 train_time:1524881ms step_avg:363.50ms step:4206/6000 train_loss:3.3586 train_time:1525136ms step_avg:363.47ms step:4207/6000 train_loss:3.8313 train_time:1525390ms step_avg:363.45ms step:4208/6000 train_loss:3.3955 train_time:1525643ms step_avg:363.42ms step:4209/6000 train_loss:3.5313 train_time:1525899ms step_avg:363.40ms step:4210/6000 train_loss:3.4297 train_time:1526153ms step_avg:363.37ms step:4211/6000 train_loss:3.8087 train_time:1527481ms step_avg:363.60ms step:4212/6000 train_loss:3.4693 train_time:1527733ms step_avg:363.57ms step:4213/6000 train_loss:3.4654 train_time:1527988ms step_avg:363.55ms step:4214/6000 train_loss:3.3386 train_time:1528242ms step_avg:363.52ms step:4215/6000 train_loss:3.4040 train_time:1528496ms step_avg:363.49ms step:4216/6000 train_loss:3.4786 train_time:1528750ms step_avg:363.47ms step:4217/6000 train_loss:3.3355 train_time:1529005ms step_avg:363.44ms step:4218/6000 train_loss:3.4025 train_time:1529259ms step_avg:363.42ms step:4219/6000 train_loss:3.4537 train_time:1529515ms step_avg:363.39ms step:4220/6000 train_loss:3.2585 train_time:1529771ms step_avg:363.37ms step:4221/6000 train_loss:3.4316 train_time:1531096ms step_avg:363.59ms step:4222/6000 train_loss:3.4579 train_time:1531350ms step_avg:363.57ms step:4223/6000 train_loss:3.4186 train_time:1531605ms step_avg:363.54ms step:4224/6000 train_loss:3.6306 train_time:1531859ms step_avg:363.52ms step:4225/6000 train_loss:3.5073 train_time:1532113ms step_avg:363.49ms step:4226/6000 train_loss:3.5633 train_time:1532367ms step_avg:363.46ms step:4227/6000 train_loss:3.3366 train_time:1532622ms step_avg:363.44ms step:4228/6000 train_loss:3.4060 train_time:1532876ms step_avg:363.41ms step:4229/6000 train_loss:3.4444 train_time:1533133ms step_avg:363.39ms step:4230/6000 train_loss:3.3489 train_time:1533387ms step_avg:363.36ms step:4231/6000 train_loss:3.5407 train_time:1534713ms step_avg:363.59ms step:4232/6000 train_loss:3.5496 train_time:1534965ms step_avg:363.56ms step:4233/6000 train_loss:3.5376 train_time:1535219ms step_avg:363.54ms step:4234/6000 train_loss:3.6533 train_time:1535474ms step_avg:363.51ms step:4235/6000 train_loss:3.5003 train_time:1535729ms step_avg:363.49ms step:4236/6000 train_loss:3.4426 train_time:1535985ms step_avg:363.46ms step:4237/6000 train_loss:3.2936 train_time:1536239ms step_avg:363.43ms step:4238/6000 train_loss:3.5060 train_time:1536494ms step_avg:363.41ms step:4239/6000 train_loss:3.4187 train_time:1536748ms step_avg:363.38ms step:4240/6000 train_loss:3.3404 train_time:1537003ms step_avg:363.36ms step:4241/6000 train_loss:3.3826 train_time:1538330ms step_avg:363.59ms step:4242/6000 train_loss:3.3301 train_time:1538582ms step_avg:363.56ms step:4243/6000 train_loss:3.4062 train_time:1538835ms step_avg:363.53ms step:4244/6000 train_loss:3.3324 train_time:1539090ms step_avg:363.51ms step:4245/6000 train_loss:3.2391 train_time:1539344ms step_avg:363.48ms step:4246/6000 train_loss:3.5457 train_time:1539599ms step_avg:363.46ms step:4247/6000 train_loss:3.3491 train_time:1539853ms step_avg:363.43ms step:4248/6000 train_loss:3.2714 train_time:1540109ms step_avg:363.40ms step:4249/6000 train_loss:3.4914 train_time:1540362ms step_avg:363.38ms step:4250/6000 train_loss:3.8039 train_time:1540619ms step_avg:363.35ms step:4250/6000 val_loss:3.4183 train_time:1540653ms step_avg:363.36ms step:4251/6000 train_loss:3.4037 train_time:1541949ms step_avg:363.58ms step:4252/6000 train_loss:3.6500 train_time:1542201ms step_avg:363.56ms step:4253/6000 train_loss:3.4971 train_time:1542457ms step_avg:363.53ms step:4254/6000 train_loss:3.3055 train_time:1542712ms step_avg:363.50ms step:4255/6000 train_loss:3.3802 train_time:1542967ms step_avg:363.48ms step:4256/6000 train_loss:3.3038 train_time:1543221ms step_avg:363.45ms step:4257/6000 train_loss:3.5424 train_time:1543478ms step_avg:363.43ms step:4258/6000 train_loss:3.4287 train_time:1543734ms step_avg:363.40ms step:4259/6000 train_loss:3.4750 train_time:1543988ms step_avg:363.38ms step:4260/6000 train_loss:3.3063 train_time:1544244ms step_avg:363.35ms step:4261/6000 train_loss:3.5994 train_time:1545570ms step_avg:363.58ms step:4262/6000 train_loss:3.4261 train_time:1545822ms step_avg:363.55ms step:4263/6000 train_loss:3.4376 train_time:1546078ms step_avg:363.53ms step:4264/6000 train_loss:3.4901 train_time:1546333ms step_avg:363.50ms step:4265/6000 train_loss:3.4311 train_time:1546587ms step_avg:363.48ms step:4266/6000 train_loss:3.4376 train_time:1546842ms step_avg:363.45ms step:4267/6000 train_loss:3.5572 train_time:1547096ms step_avg:363.42ms step:4268/6000 train_loss:3.3936 train_time:1547352ms step_avg:363.40ms step:4269/6000 train_loss:3.9226 train_time:1547606ms step_avg:363.37ms step:4270/6000 train_loss:3.3610 train_time:1547861ms step_avg:363.35ms step:4271/6000 train_loss:3.4717 train_time:1549187ms step_avg:363.57ms step:4272/6000 train_loss:3.3913 train_time:1549439ms step_avg:363.55ms step:4273/6000 train_loss:3.5979 train_time:1549694ms step_avg:363.52ms step:4274/6000 train_loss:3.5194 train_time:1549949ms step_avg:363.50ms step:4275/6000 train_loss:3.3834 train_time:1550203ms step_avg:363.47ms step:4276/6000 train_loss:3.4300 train_time:1550458ms step_avg:363.45ms step:4277/6000 train_loss:3.3621 train_time:1550712ms step_avg:363.42ms step:4278/6000 train_loss:3.3892 train_time:1550967ms step_avg:363.39ms step:4279/6000 train_loss:3.3917 train_time:1551222ms step_avg:363.37ms step:4280/6000 train_loss:3.4611 train_time:1551478ms step_avg:363.34ms step:4281/6000 train_loss:3.4478 train_time:1552805ms step_avg:363.57ms step:4282/6000 train_loss:3.4662 train_time:1553057ms step_avg:363.54ms step:4283/6000 train_loss:3.3941 train_time:1553312ms step_avg:363.52ms step:4284/6000 train_loss:3.4423 train_time:1553567ms step_avg:363.49ms step:4285/6000 train_loss:3.4997 train_time:1553821ms step_avg:363.47ms step:4286/6000 train_loss:3.4433 train_time:1554077ms step_avg:363.44ms step:4287/6000 train_loss:3.3555 train_time:1554331ms step_avg:363.42ms step:4288/6000 train_loss:3.3799 train_time:1554586ms step_avg:363.39ms step:4289/6000 train_loss:3.4739 train_time:1554841ms step_avg:363.37ms step:4290/6000 train_loss:3.4334 train_time:1555095ms step_avg:363.34ms step:4291/6000 train_loss:3.3320 train_time:1556422ms step_avg:363.57ms step:4292/6000 train_loss:3.3648 train_time:1556676ms step_avg:363.54ms step:4293/6000 train_loss:3.4271 train_time:1556930ms step_avg:363.51ms step:4294/6000 train_loss:3.2233 train_time:1557184ms step_avg:363.49ms step:4295/6000 train_loss:3.5646 train_time:1557439ms step_avg:363.46ms step:4296/6000 train_loss:3.4605 train_time:1557692ms step_avg:363.44ms step:4297/6000 train_loss:3.4036 train_time:1557947ms step_avg:363.41ms step:4298/6000 train_loss:3.5797 train_time:1558201ms step_avg:363.39ms step:4299/6000 train_loss:3.4967 train_time:1558456ms step_avg:363.36ms step:4300/6000 train_loss:3.3286 train_time:1558711ms step_avg:363.34ms step:4301/6000 train_loss:3.3273 train_time:1560037ms step_avg:363.56ms step:4302/6000 train_loss:3.4699 train_time:1560289ms step_avg:363.53ms step:4303/6000 train_loss:3.3094 train_time:1560544ms step_avg:363.51ms step:4304/6000 train_loss:3.4647 train_time:1560798ms step_avg:363.48ms step:4305/6000 train_loss:3.5392 train_time:1561053ms step_avg:363.46ms step:4306/6000 train_loss:3.2971 train_time:1561307ms step_avg:363.43ms step:4307/6000 train_loss:3.8150 train_time:1561561ms step_avg:363.41ms step:4308/6000 train_loss:3.4309 train_time:1561816ms step_avg:363.38ms step:4309/6000 train_loss:3.3602 train_time:1562071ms step_avg:363.36ms step:4310/6000 train_loss:3.3580 train_time:1562326ms step_avg:363.33ms step:4311/6000 train_loss:3.6618 train_time:1563653ms step_avg:363.56ms step:4312/6000 train_loss:3.4963 train_time:1563905ms step_avg:363.53ms step:4313/6000 train_loss:3.3513 train_time:1564160ms step_avg:363.50ms step:4314/6000 train_loss:3.5515 train_time:1564415ms step_avg:363.48ms step:4315/6000 train_loss:3.4882 train_time:1564670ms step_avg:363.45ms step:4316/6000 train_loss:3.4026 train_time:1564924ms step_avg:363.43ms step:4317/6000 train_loss:3.4410 train_time:1565179ms step_avg:363.40ms step:4318/6000 train_loss:3.3972 train_time:1565434ms step_avg:363.38ms step:4319/6000 train_loss:3.5080 train_time:1565688ms step_avg:363.35ms step:4320/6000 train_loss:3.5622 train_time:1565944ms step_avg:363.33ms step:4321/6000 train_loss:3.3811 train_time:1567270ms step_avg:363.55ms step:4322/6000 train_loss:3.5475 train_time:1567521ms step_avg:363.53ms step:4323/6000 train_loss:3.4079 train_time:1567775ms step_avg:363.50ms step:4324/6000 train_loss:3.3351 train_time:1568030ms step_avg:363.47ms step:4325/6000 train_loss:3.2768 train_time:1568285ms step_avg:363.45ms step:4326/6000 train_loss:3.3760 train_time:1568539ms step_avg:363.42ms step:4327/6000 train_loss:3.2533 train_time:1568795ms step_avg:363.40ms step:4328/6000 train_loss:3.3806 train_time:1569050ms step_avg:363.37ms step:4329/6000 train_loss:3.4117 train_time:1569305ms step_avg:363.35ms step:4330/6000 train_loss:3.3532 train_time:1569561ms step_avg:363.32ms step:4331/6000 train_loss:3.6121 train_time:1570887ms step_avg:363.55ms step:4332/6000 train_loss:3.4137 train_time:1571138ms step_avg:363.52ms step:4333/6000 train_loss:3.5233 train_time:1571394ms step_avg:363.50ms step:4334/6000 train_loss:3.8855 train_time:1571649ms step_avg:363.47ms step:4335/6000 train_loss:3.4158 train_time:1571903ms step_avg:363.45ms step:4336/6000 train_loss:3.5261 train_time:1572158ms step_avg:363.42ms step:4337/6000 train_loss:3.4136 train_time:1572413ms step_avg:363.40ms step:4338/6000 train_loss:3.3089 train_time:1572668ms step_avg:363.37ms step:4339/6000 train_loss:3.4460 train_time:1572922ms step_avg:363.35ms step:4340/6000 train_loss:3.3367 train_time:1573178ms step_avg:363.32ms step:4341/6000 train_loss:3.4341 train_time:1574505ms step_avg:363.54ms step:4342/6000 train_loss:3.4577 train_time:1574757ms step_avg:363.52ms step:4343/6000 train_loss:3.4536 train_time:1575010ms step_avg:363.49ms step:4344/6000 train_loss:3.4412 train_time:1575265ms step_avg:363.47ms step:4345/6000 train_loss:4.0794 train_time:1575520ms step_avg:363.44ms step:4346/6000 train_loss:3.5239 train_time:1575776ms step_avg:363.42ms step:4347/6000 train_loss:3.3136 train_time:1576172ms step_avg:363.42ms step:4348/6000 train_loss:3.4535 train_time:1576427ms step_avg:363.40ms step:4349/6000 train_loss:3.4055 train_time:1576681ms step_avg:363.37ms step:4350/6000 train_loss:3.3148 train_time:1576937ms step_avg:363.35ms step:4351/6000 train_loss:3.4812 train_time:1578263ms step_avg:363.57ms step:4352/6000 train_loss:3.4381 train_time:1578515ms step_avg:363.55ms step:4353/6000 train_loss:3.4993 train_time:1578769ms step_avg:363.52ms step:4354/6000 train_loss:3.5426 train_time:1579023ms step_avg:363.50ms step:4355/6000 train_loss:3.3713 train_time:1579278ms step_avg:363.47ms step:4356/6000 train_loss:3.3236 train_time:1579532ms step_avg:363.44ms step:4357/6000 train_loss:3.4533 train_time:1579787ms step_avg:363.42ms step:4358/6000 train_loss:3.3946 train_time:1580043ms step_avg:363.40ms step:4359/6000 train_loss:3.5818 train_time:1580298ms step_avg:363.37ms step:4360/6000 train_loss:3.4480 train_time:1580554ms step_avg:363.35ms step:4361/6000 train_loss:3.5305 train_time:1581880ms step_avg:363.57ms step:4362/6000 train_loss:3.6855 train_time:1582132ms step_avg:363.54ms step:4363/6000 train_loss:3.4781 train_time:1582386ms step_avg:363.52ms step:4364/6000 train_loss:3.4382 train_time:1582641ms step_avg:363.49ms step:4365/6000 train_loss:3.6459 train_time:1582894ms step_avg:363.47ms step:4366/6000 train_loss:3.5352 train_time:1583149ms step_avg:363.44ms step:4367/6000 train_loss:3.3559 train_time:1583402ms step_avg:363.42ms step:4368/6000 train_loss:3.3736 train_time:1583657ms step_avg:363.39ms step:4369/6000 train_loss:3.4720 train_time:1583914ms step_avg:363.37ms step:4370/6000 train_loss:3.4722 train_time:1584334ms step_avg:363.38ms step:4371/6000 train_loss:3.6156 train_time:1585660ms step_avg:363.60ms step:4372/6000 train_loss:3.3202 train_time:1585911ms step_avg:363.57ms step:4373/6000 train_loss:3.3037 train_time:1586165ms step_avg:363.55ms step:4374/6000 train_loss:3.4675 train_time:1586419ms step_avg:363.52ms step:4375/6000 train_loss:3.4862 train_time:1586673ms step_avg:363.50ms step:4375/6000 val_loss:3.4023 train_time:1586708ms step_avg:363.51ms step:4376/6000 train_loss:3.5504 train_time:1586931ms step_avg:363.47ms step:4377/6000 train_loss:3.3421 train_time:1587186ms step_avg:363.45ms step:4378/6000 train_loss:3.3952 train_time:1587441ms step_avg:363.43ms step:4379/6000 train_loss:3.4046 train_time:1587697ms step_avg:363.40ms step:4380/6000 train_loss:3.4511 train_time:1587952ms step_avg:363.38ms step:4381/6000 train_loss:3.3504 train_time:1589279ms step_avg:363.60ms step:4382/6000 train_loss:3.5904 train_time:1589532ms step_avg:363.57ms step:4383/6000 train_loss:3.4986 train_time:1589786ms step_avg:363.55ms step:4384/6000 train_loss:3.4973 train_time:1590040ms step_avg:363.52ms step:4385/6000 train_loss:3.3732 train_time:1590296ms step_avg:363.50ms step:4386/6000 train_loss:3.5074 train_time:1590550ms step_avg:363.47ms step:4387/6000 train_loss:3.3959 train_time:1590805ms step_avg:363.45ms step:4388/6000 train_loss:3.5272 train_time:1591059ms step_avg:363.42ms step:4389/6000 train_loss:3.3571 train_time:1591315ms step_avg:363.40ms step:4390/6000 train_loss:3.4802 train_time:1591569ms step_avg:363.37ms step:4391/6000 train_loss:3.4881 train_time:1592895ms step_avg:363.59ms step:4392/6000 train_loss:3.3144 train_time:1593148ms step_avg:363.57ms step:4393/6000 train_loss:3.9930 train_time:1593403ms step_avg:363.54ms step:4394/6000 train_loss:3.3814 train_time:1593657ms step_avg:363.52ms step:4395/6000 train_loss:3.5725 train_time:1593912ms step_avg:363.49ms step:4396/6000 train_loss:3.3524 train_time:1594166ms step_avg:363.47ms step:4397/6000 train_loss:3.4804 train_time:1594421ms step_avg:363.44ms step:4398/6000 train_loss:3.2706 train_time:1594677ms step_avg:363.42ms step:4399/6000 train_loss:3.4962 train_time:1594932ms step_avg:363.39ms step:4400/6000 train_loss:3.3268 train_time:1595187ms step_avg:363.37ms step:4401/6000 train_loss:3.3914 train_time:1596513ms step_avg:363.59ms step:4402/6000 train_loss:3.4573 train_time:1596766ms step_avg:363.56ms step:4403/6000 train_loss:3.2881 train_time:1597021ms step_avg:363.54ms step:4404/6000 train_loss:3.3518 train_time:1597275ms step_avg:363.51ms step:4405/6000 train_loss:3.5459 train_time:1597530ms step_avg:363.49ms step:4406/6000 train_loss:3.4073 train_time:1597786ms step_avg:363.46ms step:4407/6000 train_loss:3.4227 train_time:1598040ms step_avg:363.44ms step:4408/6000 train_loss:3.3788 train_time:1598295ms step_avg:363.41ms step:4409/6000 train_loss:3.4652 train_time:1598550ms step_avg:363.39ms step:4410/6000 train_loss:3.4556 train_time:1598805ms step_avg:363.36ms step:4411/6000 train_loss:3.5708 train_time:1600131ms step_avg:363.58ms step:4412/6000 train_loss:3.4128 train_time:1600383ms step_avg:363.56ms step:4413/6000 train_loss:3.4482 train_time:1600638ms step_avg:363.53ms step:4414/6000 train_loss:3.4263 train_time:1600893ms step_avg:363.51ms step:4415/6000 train_loss:3.4804 train_time:1601148ms step_avg:363.48ms step:4416/6000 train_loss:3.4308 train_time:1601402ms step_avg:363.46ms step:4417/6000 train_loss:3.4902 train_time:1601657ms step_avg:363.43ms step:4418/6000 train_loss:3.4156 train_time:1601912ms step_avg:363.41ms step:4419/6000 train_loss:3.3320 train_time:1602166ms step_avg:363.39ms step:4420/6000 train_loss:3.3796 train_time:1602421ms step_avg:363.36ms step:4421/6000 train_loss:3.6116 train_time:1603748ms step_avg:363.58ms step:4422/6000 train_loss:3.4122 train_time:1604000ms step_avg:363.55ms step:4423/6000 train_loss:3.3339 train_time:1604255ms step_avg:363.53ms step:4424/6000 train_loss:3.3513 train_time:1604509ms step_avg:363.50ms step:4425/6000 train_loss:3.5276 train_time:1604763ms step_avg:363.48ms step:4426/6000 train_loss:3.4864 train_time:1605019ms step_avg:363.46ms step:4427/6000 train_loss:3.3975 train_time:1605274ms step_avg:363.43ms step:4428/6000 train_loss:3.6125 train_time:1605528ms step_avg:363.41ms step:4429/6000 train_loss:3.5145 train_time:1605783ms step_avg:363.38ms step:4430/6000 train_loss:3.3130 train_time:1606040ms step_avg:363.36ms step:4431/6000 train_loss:3.3125 train_time:1607367ms step_avg:363.58ms step:4432/6000 train_loss:3.4549 train_time:1607622ms step_avg:363.55ms step:4433/6000 train_loss:3.3270 train_time:1607878ms step_avg:363.53ms step:4434/6000 train_loss:3.4436 train_time:1608131ms step_avg:363.50ms step:4435/6000 train_loss:3.5019 train_time:1608386ms step_avg:363.48ms step:4436/6000 train_loss:3.3978 train_time:1608640ms step_avg:363.45ms step:4437/6000 train_loss:3.3097 train_time:1608893ms step_avg:363.43ms step:4438/6000 train_loss:3.5548 train_time:1609148ms step_avg:363.40ms step:4439/6000 train_loss:3.4828 train_time:1609403ms step_avg:363.38ms step:4440/6000 train_loss:3.3815 train_time:1609658ms step_avg:363.35ms step:4441/6000 train_loss:3.4880 train_time:1610985ms step_avg:363.57ms step:4442/6000 train_loss:3.5132 train_time:1611236ms step_avg:363.55ms step:4443/6000 train_loss:3.5817 train_time:1611490ms step_avg:363.52ms step:4444/6000 train_loss:3.4530 train_time:1611744ms step_avg:363.50ms step:4445/6000 train_loss:3.2796 train_time:1612000ms step_avg:363.47ms step:4446/6000 train_loss:3.5486 train_time:1612254ms step_avg:363.45ms step:4447/6000 train_loss:3.4219 train_time:1612509ms step_avg:363.42ms step:4448/6000 train_loss:3.3196 train_time:1612764ms step_avg:363.40ms step:4449/6000 train_loss:3.4521 train_time:1613020ms step_avg:363.37ms step:4450/6000 train_loss:3.4376 train_time:1613275ms step_avg:363.35ms step:4451/6000 train_loss:3.4626 train_time:1614601ms step_avg:363.57ms step:4452/6000 train_loss:3.4877 train_time:1614853ms step_avg:363.54ms step:4453/6000 train_loss:3.3597 train_time:1615106ms step_avg:363.52ms step:4454/6000 train_loss:3.3930 train_time:1615360ms step_avg:363.49ms step:4455/6000 train_loss:3.3877 train_time:1615615ms step_avg:363.47ms step:4456/6000 train_loss:3.2716 train_time:1615869ms step_avg:363.44ms step:4457/6000 train_loss:3.4933 train_time:1616123ms step_avg:363.42ms step:4458/6000 train_loss:3.3629 train_time:1616378ms step_avg:363.39ms step:4459/6000 train_loss:3.3196 train_time:1616633ms step_avg:363.37ms step:4460/6000 train_loss:3.4457 train_time:1616888ms step_avg:363.35ms step:4461/6000 train_loss:3.9468 train_time:1618213ms step_avg:363.56ms step:4462/6000 train_loss:3.4391 train_time:1618465ms step_avg:363.54ms step:4463/6000 train_loss:3.5595 train_time:1618720ms step_avg:363.51ms step:4464/6000 train_loss:3.4645 train_time:1618973ms step_avg:363.49ms step:4465/6000 train_loss:3.4333 train_time:1619229ms step_avg:363.46ms step:4466/6000 train_loss:3.5052 train_time:1619483ms step_avg:363.44ms step:4467/6000 train_loss:3.3201 train_time:1619737ms step_avg:363.41ms step:4468/6000 train_loss:3.3597 train_time:1619994ms step_avg:363.39ms step:4469/6000 train_loss:3.4864 train_time:1620249ms step_avg:363.37ms step:4470/6000 train_loss:3.4710 train_time:1620505ms step_avg:363.34ms step:4471/6000 train_loss:3.3981 train_time:1621831ms step_avg:363.56ms step:4472/6000 train_loss:3.3536 train_time:1622083ms step_avg:363.53ms step:4473/6000 train_loss:3.4237 train_time:1622337ms step_avg:363.51ms step:4474/6000 train_loss:3.2867 train_time:1622592ms step_avg:363.48ms step:4475/6000 train_loss:3.3564 train_time:1622845ms step_avg:363.46ms step:4476/6000 train_loss:3.3837 train_time:1623101ms step_avg:363.44ms step:4477/6000 train_loss:3.5471 train_time:1623355ms step_avg:363.41ms step:4478/6000 train_loss:3.2845 train_time:1623609ms step_avg:363.39ms step:4479/6000 train_loss:3.3984 train_time:1623863ms step_avg:363.36ms step:4480/6000 train_loss:3.4438 train_time:1624120ms step_avg:363.34ms step:4481/6000 train_loss:3.4166 train_time:1625446ms step_avg:363.55ms step:4482/6000 train_loss:3.4126 train_time:1625697ms step_avg:363.53ms step:4483/6000 train_loss:3.2215 train_time:1625950ms step_avg:363.50ms step:4484/6000 train_loss:3.3781 train_time:1626205ms step_avg:363.48ms step:4485/6000 train_loss:3.3211 train_time:1626459ms step_avg:363.45ms step:4486/6000 train_loss:3.4458 train_time:1626714ms step_avg:363.43ms step:4487/6000 train_loss:3.3331 train_time:1626970ms step_avg:363.41ms step:4488/6000 train_loss:3.4005 train_time:1627224ms step_avg:363.38ms step:4489/6000 train_loss:3.5364 train_time:1627480ms step_avg:363.36ms step:4490/6000 train_loss:3.5136 train_time:1627735ms step_avg:363.33ms step:4491/6000 train_loss:3.3785 train_time:1629062ms step_avg:363.55ms step:4492/6000 train_loss:3.3446 train_time:1629317ms step_avg:363.52ms step:4493/6000 train_loss:3.3915 train_time:1629572ms step_avg:363.50ms step:4494/6000 train_loss:3.4250 train_time:1629826ms step_avg:363.48ms step:4495/6000 train_loss:3.4147 train_time:1630080ms step_avg:363.45ms step:4496/6000 train_loss:3.3569 train_time:1630335ms step_avg:363.43ms step:4497/6000 train_loss:3.5181 train_time:1630590ms step_avg:363.40ms step:4498/6000 train_loss:3.4111 train_time:1630846ms step_avg:363.38ms step:4499/6000 train_loss:3.2421 train_time:1631100ms step_avg:363.35ms step:4500/6000 train_loss:3.5464 train_time:1631355ms step_avg:363.33ms step:4500/6000 val_loss:3.3934 train_time:1631390ms step_avg:363.34ms step:4501/6000 train_loss:3.3519 train_time:1632686ms step_avg:363.55ms step:4502/6000 train_loss:3.3039 train_time:1632939ms step_avg:363.52ms step:4503/6000 train_loss:3.4958 train_time:1633195ms step_avg:363.50ms step:4504/6000 train_loss:3.3772 train_time:1633450ms step_avg:363.47ms step:4505/6000 train_loss:3.4707 train_time:1633704ms step_avg:363.45ms step:4506/6000 train_loss:3.3854 train_time:1633958ms step_avg:363.42ms step:4507/6000 train_loss:3.4675 train_time:1634213ms step_avg:363.40ms step:4508/6000 train_loss:3.1948 train_time:1634467ms step_avg:363.38ms step:4509/6000 train_loss:3.4675 train_time:1634722ms step_avg:363.35ms step:4510/6000 train_loss:3.3060 train_time:1634977ms step_avg:363.33ms step:4511/6000 train_loss:3.3823 train_time:1636303ms step_avg:363.54ms step:4512/6000 train_loss:3.3306 train_time:1636556ms step_avg:363.52ms step:4513/6000 train_loss:3.3177 train_time:1636812ms step_avg:363.49ms step:4514/6000 train_loss:3.2785 train_time:1637066ms step_avg:363.47ms step:4515/6000 train_loss:3.4308 train_time:1637322ms step_avg:363.45ms step:4516/6000 train_loss:3.2784 train_time:1637578ms step_avg:363.42ms step:4517/6000 train_loss:3.3788 train_time:1637833ms step_avg:363.40ms step:4518/6000 train_loss:3.3867 train_time:1638088ms step_avg:363.37ms step:4519/6000 train_loss:3.3998 train_time:1638342ms step_avg:363.35ms step:4520/6000 train_loss:3.3166 train_time:1638598ms step_avg:363.33ms step:4521/6000 train_loss:3.4996 train_time:1639923ms step_avg:363.54ms step:4522/6000 train_loss:3.5726 train_time:1640174ms step_avg:363.51ms step:4523/6000 train_loss:3.9119 train_time:1640428ms step_avg:363.49ms step:4524/6000 train_loss:3.6419 train_time:1640682ms step_avg:363.47ms step:4525/6000 train_loss:3.4039 train_time:1640937ms step_avg:363.44ms step:4526/6000 train_loss:3.3719 train_time:1641191ms step_avg:363.42ms step:4527/6000 train_loss:3.4371 train_time:1641446ms step_avg:363.39ms step:4528/6000 train_loss:3.3911 train_time:1641701ms step_avg:363.37ms step:4529/6000 train_loss:3.3082 train_time:1641955ms step_avg:363.34ms step:4530/6000 train_loss:4.0017 train_time:1642209ms step_avg:363.32ms step:4531/6000 train_loss:3.4670 train_time:1643537ms step_avg:363.53ms step:4532/6000 train_loss:3.2040 train_time:1643789ms step_avg:363.51ms step:4533/6000 train_loss:3.3076 train_time:1644043ms step_avg:363.49ms step:4534/6000 train_loss:3.4224 train_time:1644299ms step_avg:363.46ms step:4535/6000 train_loss:3.6392 train_time:1644554ms step_avg:363.44ms step:4536/6000 train_loss:3.6431 train_time:1644951ms step_avg:363.44ms step:4537/6000 train_loss:3.3837 train_time:1645208ms step_avg:363.42ms step:4538/6000 train_loss:3.3635 train_time:1645462ms step_avg:363.40ms step:4539/6000 train_loss:3.4037 train_time:1645719ms step_avg:363.37ms step:4540/6000 train_loss:3.9811 train_time:1645974ms step_avg:363.35ms step:4541/6000 train_loss:3.4638 train_time:1647300ms step_avg:363.56ms step:4542/6000 train_loss:3.3715 train_time:1647552ms step_avg:363.54ms step:4543/6000 train_loss:3.5326 train_time:1647807ms step_avg:363.51ms step:4544/6000 train_loss:3.3265 train_time:1648061ms step_avg:363.49ms step:4545/6000 train_loss:3.4249 train_time:1648316ms step_avg:363.47ms step:4546/6000 train_loss:3.6039 train_time:1648571ms step_avg:363.44ms step:4547/6000 train_loss:3.4593 train_time:1648825ms step_avg:363.42ms step:4548/6000 train_loss:3.4157 train_time:1649080ms step_avg:363.39ms step:4549/6000 train_loss:3.3979 train_time:1649335ms step_avg:363.37ms step:4550/6000 train_loss:3.3396 train_time:1649590ms step_avg:363.35ms step:4551/6000 train_loss:3.3371 train_time:1650916ms step_avg:363.56ms step:4552/6000 train_loss:3.3042 train_time:1651168ms step_avg:363.53ms step:4553/6000 train_loss:3.4138 train_time:1651423ms step_avg:363.51ms step:4554/6000 train_loss:3.6171 train_time:1651678ms step_avg:363.49ms step:4555/6000 train_loss:3.4917 train_time:1651931ms step_avg:363.46ms step:4556/6000 train_loss:3.2427 train_time:1652186ms step_avg:363.44ms step:4557/6000 train_loss:3.4478 train_time:1652441ms step_avg:363.41ms step:4558/6000 train_loss:3.4633 train_time:1652697ms step_avg:363.39ms step:4559/6000 train_loss:3.4447 train_time:1652951ms step_avg:363.37ms step:4560/6000 train_loss:3.5469 train_time:1653374ms step_avg:363.38ms step:4561/6000 train_loss:3.3814 train_time:1654699ms step_avg:363.59ms step:4562/6000 train_loss:3.3847 train_time:1654952ms step_avg:363.57ms step:4563/6000 train_loss:3.4092 train_time:1655206ms step_avg:363.54ms step:4564/6000 train_loss:3.4433 train_time:1655460ms step_avg:363.52ms step:4565/6000 train_loss:3.5256 train_time:1655714ms step_avg:363.49ms step:4566/6000 train_loss:3.5944 train_time:1655968ms step_avg:363.47ms step:4567/6000 train_loss:3.4489 train_time:1656222ms step_avg:363.45ms step:4568/6000 train_loss:3.3119 train_time:1656478ms step_avg:363.42ms step:4569/6000 train_loss:3.4229 train_time:1656731ms step_avg:363.40ms step:4570/6000 train_loss:3.2990 train_time:1656986ms step_avg:363.37ms step:4571/6000 train_loss:3.3287 train_time:1658311ms step_avg:363.59ms step:4572/6000 train_loss:3.5330 train_time:1658565ms step_avg:363.56ms step:4573/6000 train_loss:3.2492 train_time:1658821ms step_avg:363.54ms step:4574/6000 train_loss:3.3142 train_time:1659074ms step_avg:363.51ms step:4575/6000 train_loss:3.4475 train_time:1659328ms step_avg:363.49ms step:4576/6000 train_loss:3.4757 train_time:1659582ms step_avg:363.47ms step:4577/6000 train_loss:3.4412 train_time:1659837ms step_avg:363.44ms step:4578/6000 train_loss:3.3975 train_time:1660092ms step_avg:363.42ms step:4579/6000 train_loss:3.4147 train_time:1660346ms step_avg:363.39ms step:4580/6000 train_loss:3.5205 train_time:1660601ms step_avg:363.37ms step:4581/6000 train_loss:3.3581 train_time:1661928ms step_avg:363.58ms step:4582/6000 train_loss:3.3733 train_time:1662180ms step_avg:363.56ms step:4583/6000 train_loss:3.4924 train_time:1662434ms step_avg:363.53ms step:4584/6000 train_loss:3.3329 train_time:1662690ms step_avg:363.51ms step:4585/6000 train_loss:3.4519 train_time:1662944ms step_avg:363.48ms step:4586/6000 train_loss:3.4221 train_time:1663199ms step_avg:363.46ms step:4587/6000 train_loss:3.4019 train_time:1663454ms step_avg:363.44ms step:4588/6000 train_loss:3.2654 train_time:1663707ms step_avg:363.41ms step:4589/6000 train_loss:3.3857 train_time:1663962ms step_avg:363.39ms step:4590/6000 train_loss:3.5795 train_time:1664217ms step_avg:363.37ms step:4591/6000 train_loss:3.4078 train_time:1665543ms step_avg:363.58ms step:4592/6000 train_loss:3.4001 train_time:1665795ms step_avg:363.55ms step:4593/6000 train_loss:3.3559 train_time:1666049ms step_avg:363.53ms step:4594/6000 train_loss:3.5254 train_time:1666304ms step_avg:363.50ms step:4595/6000 train_loss:3.4105 train_time:1666559ms step_avg:363.48ms step:4596/6000 train_loss:3.3103 train_time:1666813ms step_avg:363.46ms step:4597/6000 train_loss:3.2968 train_time:1667067ms step_avg:363.43ms step:4598/6000 train_loss:3.4805 train_time:1667323ms step_avg:363.41ms step:4599/6000 train_loss:3.4037 train_time:1667578ms step_avg:363.39ms step:4600/6000 train_loss:3.5253 train_time:1667833ms step_avg:363.36ms step:4601/6000 train_loss:3.4266 train_time:1669161ms step_avg:363.57ms step:4602/6000 train_loss:3.2455 train_time:1669413ms step_avg:363.55ms step:4603/6000 train_loss:3.3511 train_time:1669667ms step_avg:363.52ms step:4604/6000 train_loss:3.4302 train_time:1669921ms step_avg:363.50ms step:4605/6000 train_loss:3.4481 train_time:1670176ms step_avg:363.48ms step:4606/6000 train_loss:3.3700 train_time:1670431ms step_avg:363.45ms step:4607/6000 train_loss:3.4887 train_time:1670685ms step_avg:363.43ms step:4608/6000 train_loss:3.3532 train_time:1670940ms step_avg:363.41ms step:4609/6000 train_loss:3.4681 train_time:1671195ms step_avg:363.38ms step:4610/6000 train_loss:3.4086 train_time:1671450ms step_avg:363.36ms step:4611/6000 train_loss:3.4464 train_time:1672777ms step_avg:363.57ms step:4612/6000 train_loss:3.5859 train_time:1673028ms step_avg:363.54ms step:4613/6000 train_loss:3.2927 train_time:1673282ms step_avg:363.52ms step:4614/6000 train_loss:3.1393 train_time:1673537ms step_avg:363.50ms step:4615/6000 train_loss:3.3405 train_time:1673792ms step_avg:363.47ms step:4616/6000 train_loss:3.2834 train_time:1674046ms step_avg:363.45ms step:4617/6000 train_loss:3.3774 train_time:1674302ms step_avg:363.43ms step:4618/6000 train_loss:3.2483 train_time:1674556ms step_avg:363.40ms step:4619/6000 train_loss:3.4602 train_time:1674811ms step_avg:363.38ms step:4620/6000 train_loss:3.5027 train_time:1675065ms step_avg:363.35ms step:4621/6000 train_loss:3.5425 train_time:1676391ms step_avg:363.56ms step:4622/6000 train_loss:3.3283 train_time:1676643ms step_avg:363.54ms step:4623/6000 train_loss:3.3359 train_time:1676898ms step_avg:363.52ms step:4624/6000 train_loss:3.3732 train_time:1677152ms step_avg:363.49ms step:4625/6000 train_loss:3.2844 train_time:1677406ms step_avg:363.47ms step:4625/6000 val_loss:3.3807 train_time:1677441ms step_avg:363.48ms step:4626/6000 train_loss:3.4595 train_time:1677664ms step_avg:363.45ms step:4627/6000 train_loss:3.3323 train_time:1677920ms step_avg:363.42ms step:4628/6000 train_loss:3.3982 train_time:1678175ms step_avg:363.40ms step:4629/6000 train_loss:3.6040 train_time:1678430ms step_avg:363.38ms step:4630/6000 train_loss:3.4323 train_time:1678684ms step_avg:363.35ms step:4631/6000 train_loss:3.5259 train_time:1680012ms step_avg:363.56ms step:4632/6000 train_loss:3.3354 train_time:1680263ms step_avg:363.54ms step:4633/6000 train_loss:3.5211 train_time:1680519ms step_avg:363.51ms step:4634/6000 train_loss:3.3740 train_time:1680773ms step_avg:363.49ms step:4635/6000 train_loss:3.4370 train_time:1681028ms step_avg:363.47ms step:4636/6000 train_loss:3.4408 train_time:1681283ms step_avg:363.44ms step:4637/6000 train_loss:3.2724 train_time:1681537ms step_avg:363.42ms step:4638/6000 train_loss:3.4478 train_time:1681791ms step_avg:363.39ms step:4639/6000 train_loss:3.3958 train_time:1682049ms step_avg:363.37ms step:4640/6000 train_loss:3.4013 train_time:1682303ms step_avg:363.35ms step:4641/6000 train_loss:3.3325 train_time:1683630ms step_avg:363.56ms step:4642/6000 train_loss:3.3752 train_time:1683882ms step_avg:363.53ms step:4643/6000 train_loss:3.3836 train_time:1684136ms step_avg:363.51ms step:4644/6000 train_loss:3.6191 train_time:1684390ms step_avg:363.49ms step:4645/6000 train_loss:3.4730 train_time:1684646ms step_avg:363.46ms step:4646/6000 train_loss:3.5159 train_time:1684901ms step_avg:363.44ms step:4647/6000 train_loss:3.3457 train_time:1685158ms step_avg:363.42ms step:4648/6000 train_loss:3.4480 train_time:1685413ms step_avg:363.39ms step:4649/6000 train_loss:3.3906 train_time:1685667ms step_avg:363.37ms step:4650/6000 train_loss:3.4418 train_time:1685922ms step_avg:363.35ms step:4651/6000 train_loss:3.5713 train_time:1687248ms step_avg:363.55ms step:4652/6000 train_loss:3.3866 train_time:1687503ms step_avg:363.53ms step:4653/6000 train_loss:3.4988 train_time:1687757ms step_avg:363.51ms step:4654/6000 train_loss:3.3421 train_time:1688012ms step_avg:363.48ms step:4655/6000 train_loss:3.3864 train_time:1688267ms step_avg:363.46ms step:4656/6000 train_loss:3.4083 train_time:1688521ms step_avg:363.44ms step:4657/6000 train_loss:3.3593 train_time:1688776ms step_avg:363.41ms step:4658/6000 train_loss:3.2842 train_time:1689030ms step_avg:363.39ms step:4659/6000 train_loss:3.3291 train_time:1689284ms step_avg:363.36ms step:4660/6000 train_loss:3.2596 train_time:1689540ms step_avg:363.34ms step:4661/6000 train_loss:3.4294 train_time:1690865ms step_avg:363.55ms step:4662/6000 train_loss:3.4198 train_time:1691119ms step_avg:363.53ms step:4663/6000 train_loss:3.3685 train_time:1691373ms step_avg:363.50ms step:4664/6000 train_loss:3.3021 train_time:1691627ms step_avg:363.48ms step:4665/6000 train_loss:3.3141 train_time:1691883ms step_avg:363.46ms step:4666/6000 train_loss:3.3590 train_time:1692139ms step_avg:363.43ms step:4667/6000 train_loss:3.4603 train_time:1692394ms step_avg:363.41ms step:4668/6000 train_loss:3.3663 train_time:1692647ms step_avg:363.39ms step:4669/6000 train_loss:3.3411 train_time:1692903ms step_avg:363.36ms step:4670/6000 train_loss:3.4005 train_time:1693159ms step_avg:363.34ms step:4671/6000 train_loss:3.4901 train_time:1694485ms step_avg:363.55ms step:4672/6000 train_loss:3.3814 train_time:1694738ms step_avg:363.52ms step:4673/6000 train_loss:3.4268 train_time:1694991ms step_avg:363.50ms step:4674/6000 train_loss:3.3670 train_time:1695246ms step_avg:363.47ms step:4675/6000 train_loss:3.4028 train_time:1695501ms step_avg:363.45ms step:4676/6000 train_loss:3.4883 train_time:1695755ms step_avg:363.43ms step:4677/6000 train_loss:3.2148 train_time:1696009ms step_avg:363.40ms step:4678/6000 train_loss:3.2620 train_time:1696264ms step_avg:363.38ms step:4679/6000 train_loss:3.3987 train_time:1696520ms step_avg:363.36ms step:4680/6000 train_loss:3.3702 train_time:1696775ms step_avg:363.34ms step:4681/6000 train_loss:3.3786 train_time:1698101ms step_avg:363.54ms step:4682/6000 train_loss:3.3747 train_time:1698354ms step_avg:363.52ms step:4683/6000 train_loss:3.3057 train_time:1698608ms step_avg:363.49ms step:4684/6000 train_loss:3.2815 train_time:1698867ms step_avg:363.47ms step:4685/6000 train_loss:3.5314 train_time:1699122ms step_avg:363.45ms step:4686/6000 train_loss:3.6182 train_time:1699376ms step_avg:363.43ms step:4687/6000 train_loss:3.3102 train_time:1699634ms step_avg:363.40ms step:4688/6000 train_loss:3.3136 train_time:1699888ms step_avg:363.38ms step:4689/6000 train_loss:3.5042 train_time:1700143ms step_avg:363.36ms step:4690/6000 train_loss:3.3244 train_time:1700397ms step_avg:363.33ms step:4691/6000 train_loss:3.1956 train_time:1701723ms step_avg:363.54ms step:4692/6000 train_loss:3.2888 train_time:1701976ms step_avg:363.51ms step:4693/6000 train_loss:3.2932 train_time:1702231ms step_avg:363.49ms step:4694/6000 train_loss:3.3207 train_time:1702485ms step_avg:363.47ms step:4695/6000 train_loss:3.3284 train_time:1702740ms step_avg:363.45ms step:4696/6000 train_loss:3.3595 train_time:1702994ms step_avg:363.42ms step:4697/6000 train_loss:3.4197 train_time:1703248ms step_avg:363.40ms step:4698/6000 train_loss:3.3299 train_time:1703502ms step_avg:363.38ms step:4699/6000 train_loss:3.3580 train_time:1703758ms step_avg:363.35ms step:4700/6000 train_loss:3.4545 train_time:1704012ms step_avg:363.33ms step:4701/6000 train_loss:3.3696 train_time:1705339ms step_avg:363.53ms step:4702/6000 train_loss:3.3605 train_time:1705592ms step_avg:363.51ms step:4703/6000 train_loss:3.3157 train_time:1705846ms step_avg:363.49ms step:4704/6000 train_loss:3.4029 train_time:1706101ms step_avg:363.46ms step:4705/6000 train_loss:3.3555 train_time:1706355ms step_avg:363.44ms step:4706/6000 train_loss:3.2821 train_time:1706610ms step_avg:363.42ms step:4707/6000 train_loss:3.4178 train_time:1706864ms step_avg:363.39ms step:4708/6000 train_loss:3.5013 train_time:1707119ms step_avg:363.37ms step:4709/6000 train_loss:3.3127 train_time:1707373ms step_avg:363.35ms step:4710/6000 train_loss:3.2927 train_time:1707628ms step_avg:363.33ms step:4711/6000 train_loss:3.3154 train_time:1708955ms step_avg:363.53ms step:4712/6000 train_loss:3.3355 train_time:1709207ms step_avg:363.51ms step:4713/6000 train_loss:3.4513 train_time:1709462ms step_avg:363.48ms step:4714/6000 train_loss:3.2955 train_time:1709716ms step_avg:363.46ms step:4715/6000 train_loss:3.3651 train_time:1709971ms step_avg:363.44ms step:4716/6000 train_loss:3.2991 train_time:1710225ms step_avg:363.41ms step:4717/6000 train_loss:3.3593 train_time:1710480ms step_avg:363.39ms step:4718/6000 train_loss:3.2910 train_time:1710735ms step_avg:363.37ms step:4719/6000 train_loss:3.2542 train_time:1710989ms step_avg:363.34ms step:4720/6000 train_loss:3.4233 train_time:1711244ms step_avg:363.32ms step:4721/6000 train_loss:3.4150 train_time:1712570ms step_avg:363.53ms step:4722/6000 train_loss:3.4092 train_time:1712823ms step_avg:363.50ms step:4723/6000 train_loss:3.2618 train_time:1713076ms step_avg:363.48ms step:4724/6000 train_loss:3.4437 train_time:1713331ms step_avg:363.46ms step:4725/6000 train_loss:3.3256 train_time:1713727ms step_avg:363.46ms step:4726/6000 train_loss:3.6278 train_time:1713982ms step_avg:363.44ms step:4727/6000 train_loss:3.4578 train_time:1714237ms step_avg:363.42ms step:4728/6000 train_loss:3.3363 train_time:1714492ms step_avg:363.39ms step:4729/6000 train_loss:3.2614 train_time:1714747ms step_avg:363.37ms step:4730/6000 train_loss:3.2277 train_time:1715002ms step_avg:363.35ms step:4731/6000 train_loss:3.3123 train_time:1716328ms step_avg:363.55ms step:4732/6000 train_loss:3.3867 train_time:1716579ms step_avg:363.53ms step:4733/6000 train_loss:3.2853 train_time:1716833ms step_avg:363.50ms step:4734/6000 train_loss:3.1810 train_time:1717087ms step_avg:363.48ms step:4735/6000 train_loss:3.4511 train_time:1717342ms step_avg:363.46ms step:4736/6000 train_loss:3.3342 train_time:1717596ms step_avg:363.44ms step:4737/6000 train_loss:3.5059 train_time:1717850ms step_avg:363.41ms step:4738/6000 train_loss:3.4276 train_time:1718104ms step_avg:363.39ms step:4739/6000 train_loss:3.3789 train_time:1718360ms step_avg:363.37ms step:4740/6000 train_loss:3.3479 train_time:1718615ms step_avg:363.34ms step:4741/6000 train_loss:3.3621 train_time:1719941ms step_avg:363.55ms step:4742/6000 train_loss:3.3490 train_time:1720194ms step_avg:363.52ms step:4743/6000 train_loss:3.2353 train_time:1720448ms step_avg:363.50ms step:4744/6000 train_loss:3.3677 train_time:1720701ms step_avg:363.48ms step:4745/6000 train_loss:3.3347 train_time:1720956ms step_avg:363.45ms step:4746/6000 train_loss:3.3321 train_time:1721210ms step_avg:363.43ms step:4747/6000 train_loss:3.3151 train_time:1721465ms step_avg:363.41ms step:4748/6000 train_loss:3.4995 train_time:1721720ms step_avg:363.39ms step:4749/6000 train_loss:3.3527 train_time:1721975ms step_avg:363.36ms step:4750/6000 train_loss:3.4471 train_time:1722393ms step_avg:363.37ms step:4750/6000 val_loss:3.3682 train_time:1722427ms step_avg:363.38ms step:4751/6000 train_loss:3.2586 train_time:1723723ms step_avg:363.58ms step:4752/6000 train_loss:3.1806 train_time:1723976ms step_avg:363.55ms step:4753/6000 train_loss:3.2639 train_time:1724230ms step_avg:363.53ms step:4754/6000 train_loss:3.4626 train_time:1724486ms step_avg:363.51ms step:4755/6000 train_loss:3.3505 train_time:1724741ms step_avg:363.49ms step:4756/6000 train_loss:3.5868 train_time:1724995ms step_avg:363.46ms step:4757/6000 train_loss:3.4537 train_time:1725250ms step_avg:363.44ms step:4758/6000 train_loss:3.3607 train_time:1725504ms step_avg:363.42ms step:4759/6000 train_loss:3.4151 train_time:1725760ms step_avg:363.39ms step:4760/6000 train_loss:3.4068 train_time:1726014ms step_avg:363.37ms step:4761/6000 train_loss:3.3369 train_time:1727342ms step_avg:363.57ms step:4762/6000 train_loss:3.3841 train_time:1727595ms step_avg:363.55ms step:4763/6000 train_loss:3.3532 train_time:1727849ms step_avg:363.53ms step:4764/6000 train_loss:3.2116 train_time:1728104ms step_avg:363.51ms step:4765/6000 train_loss:3.2236 train_time:1728360ms step_avg:363.48ms step:4766/6000 train_loss:3.2209 train_time:1728614ms step_avg:363.46ms step:4767/6000 train_loss:3.4591 train_time:1728871ms step_avg:363.44ms step:4768/6000 train_loss:3.7278 train_time:1729125ms step_avg:363.41ms step:4769/6000 train_loss:3.4317 train_time:1729380ms step_avg:363.39ms step:4770/6000 train_loss:3.3400 train_time:1729635ms step_avg:363.37ms step:4771/6000 train_loss:3.4026 train_time:1730960ms step_avg:363.57ms step:4772/6000 train_loss:3.3566 train_time:1731213ms step_avg:363.55ms step:4773/6000 train_loss:3.3375 train_time:1731467ms step_avg:363.52ms step:4774/6000 train_loss:3.5257 train_time:1731721ms step_avg:363.50ms step:4775/6000 train_loss:3.3392 train_time:1731976ms step_avg:363.48ms step:4776/6000 train_loss:3.4775 train_time:1732230ms step_avg:363.46ms step:4777/6000 train_loss:3.3921 train_time:1732485ms step_avg:363.43ms step:4778/6000 train_loss:3.2242 train_time:1732740ms step_avg:363.41ms step:4779/6000 train_loss:3.4035 train_time:1732996ms step_avg:363.39ms step:4780/6000 train_loss:3.3342 train_time:1733250ms step_avg:363.36ms step:4781/6000 train_loss:3.4120 train_time:1734578ms step_avg:363.57ms step:4782/6000 train_loss:3.3256 train_time:1734830ms step_avg:363.54ms step:4783/6000 train_loss:3.2813 train_time:1735084ms step_avg:363.52ms step:4784/6000 train_loss:3.3305 train_time:1735338ms step_avg:363.50ms step:4785/6000 train_loss:3.2641 train_time:1735592ms step_avg:363.47ms step:4786/6000 train_loss:3.5914 train_time:1735847ms step_avg:363.45ms step:4787/6000 train_loss:3.4742 train_time:1736106ms step_avg:363.43ms step:4788/6000 train_loss:3.3995 train_time:1736360ms step_avg:363.41ms step:4789/6000 train_loss:3.3803 train_time:1736613ms step_avg:363.38ms step:4790/6000 train_loss:3.3019 train_time:1736868ms step_avg:363.36ms step:4791/6000 train_loss:3.4068 train_time:1738195ms step_avg:363.56ms step:4792/6000 train_loss:3.4216 train_time:1738447ms step_avg:363.54ms step:4793/6000 train_loss:3.3344 train_time:1738703ms step_avg:363.52ms step:4794/6000 train_loss:3.4053 train_time:1738959ms step_avg:363.49ms step:4795/6000 train_loss:3.2479 train_time:1739214ms step_avg:363.47ms step:4796/6000 train_loss:3.4057 train_time:1739468ms step_avg:363.45ms step:4797/6000 train_loss:3.4722 train_time:1739722ms step_avg:363.43ms step:4798/6000 train_loss:3.1410 train_time:1739977ms step_avg:363.40ms step:4799/6000 train_loss:3.3167 train_time:1740232ms step_avg:363.38ms step:4800/6000 train_loss:3.2966 train_time:1740487ms step_avg:363.36ms step:4801/6000 train_loss:3.3930 train_time:1741813ms step_avg:363.56ms step:4802/6000 train_loss:3.2215 train_time:1742065ms step_avg:363.54ms step:4803/6000 train_loss:3.2564 train_time:1742319ms step_avg:363.51ms step:4804/6000 train_loss:3.4526 train_time:1742573ms step_avg:363.49ms step:4805/6000 train_loss:3.3924 train_time:1742827ms step_avg:363.47ms step:4806/6000 train_loss:3.4592 train_time:1743084ms step_avg:363.45ms step:4807/6000 train_loss:3.4835 train_time:1743338ms step_avg:363.42ms step:4808/6000 train_loss:3.2463 train_time:1743593ms step_avg:363.40ms step:4809/6000 train_loss:3.3683 train_time:1743847ms step_avg:363.38ms step:4810/6000 train_loss:3.3057 train_time:1744103ms step_avg:363.35ms step:4811/6000 train_loss:3.5417 train_time:1745431ms step_avg:363.56ms step:4812/6000 train_loss:3.3442 train_time:1745683ms step_avg:363.53ms step:4813/6000 train_loss:3.3879 train_time:1745937ms step_avg:363.51ms step:4814/6000 train_loss:3.2710 train_time:1746191ms step_avg:363.49ms step:4815/6000 train_loss:3.3245 train_time:1746446ms step_avg:363.46ms step:4816/6000 train_loss:3.7456 train_time:1746701ms step_avg:363.44ms step:4817/6000 train_loss:3.4290 train_time:1746955ms step_avg:363.42ms step:4818/6000 train_loss:3.3727 train_time:1747210ms step_avg:363.40ms step:4819/6000 train_loss:3.2158 train_time:1747465ms step_avg:363.37ms step:4820/6000 train_loss:3.3578 train_time:1747720ms step_avg:363.35ms step:4821/6000 train_loss:3.3607 train_time:1749046ms step_avg:363.55ms step:4822/6000 train_loss:3.4165 train_time:1749298ms step_avg:363.53ms step:4823/6000 train_loss:3.4600 train_time:1749552ms step_avg:363.51ms step:4824/6000 train_loss:3.3423 train_time:1749806ms step_avg:363.48ms step:4825/6000 train_loss:3.3345 train_time:1750062ms step_avg:363.46ms step:4826/6000 train_loss:3.2539 train_time:1750316ms step_avg:363.44ms step:4827/6000 train_loss:3.2224 train_time:1750570ms step_avg:363.41ms step:4828/6000 train_loss:3.4038 train_time:1750824ms step_avg:363.39ms step:4829/6000 train_loss:3.2870 train_time:1751079ms step_avg:363.37ms step:4830/6000 train_loss:3.4074 train_time:1751333ms step_avg:363.35ms step:4831/6000 train_loss:3.5693 train_time:1752664ms step_avg:363.55ms step:4832/6000 train_loss:3.3052 train_time:1752915ms step_avg:363.52ms step:4833/6000 train_loss:3.3869 train_time:1753170ms step_avg:363.50ms step:4834/6000 train_loss:3.3493 train_time:1753424ms step_avg:363.48ms step:4835/6000 train_loss:3.5330 train_time:1753679ms step_avg:363.46ms step:4836/6000 train_loss:3.3500 train_time:1753934ms step_avg:363.43ms step:4837/6000 train_loss:3.6026 train_time:1754189ms step_avg:363.41ms step:4838/6000 train_loss:3.5603 train_time:1754444ms step_avg:363.39ms step:4839/6000 train_loss:3.3881 train_time:1754700ms step_avg:363.37ms step:4840/6000 train_loss:3.3857 train_time:1754954ms step_avg:363.34ms step:4841/6000 train_loss:3.3685 train_time:1756281ms step_avg:363.54ms step:4842/6000 train_loss:3.4091 train_time:1756532ms step_avg:363.52ms step:4843/6000 train_loss:3.4040 train_time:1756787ms step_avg:363.50ms step:4844/6000 train_loss:3.2568 train_time:1757041ms step_avg:363.48ms step:4845/6000 train_loss:3.2862 train_time:1757295ms step_avg:363.45ms step:4846/6000 train_loss:3.2707 train_time:1757549ms step_avg:363.43ms step:4847/6000 train_loss:3.4246 train_time:1757805ms step_avg:363.41ms step:4848/6000 train_loss:3.2980 train_time:1758060ms step_avg:363.39ms step:4849/6000 train_loss:3.3294 train_time:1758313ms step_avg:363.36ms step:4850/6000 train_loss:3.4695 train_time:1758569ms step_avg:363.34ms step:4851/6000 train_loss:3.3536 train_time:1759895ms step_avg:363.54ms step:4852/6000 train_loss:3.1602 train_time:1760148ms step_avg:363.52ms step:4853/6000 train_loss:3.2513 train_time:1760404ms step_avg:363.49ms step:4854/6000 train_loss:3.3915 train_time:1760659ms step_avg:363.47ms step:4855/6000 train_loss:3.3406 train_time:1760913ms step_avg:363.45ms step:4856/6000 train_loss:3.4813 train_time:1761168ms step_avg:363.43ms step:4857/6000 train_loss:3.3197 train_time:1761423ms step_avg:363.40ms step:4858/6000 train_loss:3.3520 train_time:1761678ms step_avg:363.38ms step:4859/6000 train_loss:3.3067 train_time:1761933ms step_avg:363.36ms step:4860/6000 train_loss:3.4291 train_time:1762188ms step_avg:363.34ms step:4861/6000 train_loss:3.3025 train_time:1763514ms step_avg:363.54ms step:4862/6000 train_loss:3.3576 train_time:1763765ms step_avg:363.51ms step:4863/6000 train_loss:3.3638 train_time:1764020ms step_avg:363.49ms step:4864/6000 train_loss:3.3227 train_time:1764275ms step_avg:363.47ms step:4865/6000 train_loss:3.4070 train_time:1764528ms step_avg:363.45ms step:4866/6000 train_loss:3.0525 train_time:1764784ms step_avg:363.42ms step:4867/6000 train_loss:3.2718 train_time:1765038ms step_avg:363.40ms step:4868/6000 train_loss:3.3240 train_time:1765293ms step_avg:363.38ms step:4869/6000 train_loss:3.3432 train_time:1765548ms step_avg:363.36ms step:4870/6000 train_loss:3.3441 train_time:1765804ms step_avg:363.33ms step:4871/6000 train_loss:3.3560 train_time:1767131ms step_avg:363.53ms step:4872/6000 train_loss:3.4647 train_time:1767384ms step_avg:363.51ms step:4873/6000 train_loss:3.4585 train_time:1767638ms step_avg:363.49ms step:4874/6000 train_loss:3.4832 train_time:1767893ms step_avg:363.46ms step:4875/6000 train_loss:3.5592 train_time:1768147ms step_avg:363.44ms step:4875/6000 val_loss:3.3561 train_time:1768181ms step_avg:363.45ms step:4876/6000 train_loss:3.3785 train_time:1768405ms step_avg:363.42ms step:4877/6000 train_loss:3.2882 train_time:1768660ms step_avg:363.40ms step:4878/6000 train_loss:3.2559 train_time:1768916ms step_avg:363.38ms step:4879/6000 train_loss:3.3020 train_time:1769171ms step_avg:363.35ms step:4880/6000 train_loss:3.4381 train_time:1769428ms step_avg:363.33ms step:4881/6000 train_loss:3.2829 train_time:1770754ms step_avg:363.53ms step:4882/6000 train_loss:3.4090 train_time:1771007ms step_avg:363.51ms step:4883/6000 train_loss:3.4193 train_time:1771262ms step_avg:363.48ms step:4884/6000 train_loss:3.3220 train_time:1771517ms step_avg:363.46ms step:4885/6000 train_loss:3.3217 train_time:1771772ms step_avg:363.44ms step:4886/6000 train_loss:3.4304 train_time:1772028ms step_avg:363.42ms step:4887/6000 train_loss:3.4504 train_time:1772283ms step_avg:363.40ms step:4888/6000 train_loss:3.3347 train_time:1772538ms step_avg:363.37ms step:4889/6000 train_loss:3.2882 train_time:1772791ms step_avg:363.35ms step:4890/6000 train_loss:3.3864 train_time:1773047ms step_avg:363.33ms step:4891/6000 train_loss:3.2958 train_time:1774373ms step_avg:363.53ms step:4892/6000 train_loss:3.3905 train_time:1774625ms step_avg:363.50ms step:4893/6000 train_loss:3.3789 train_time:1774879ms step_avg:363.48ms step:4894/6000 train_loss:3.4140 train_time:1775135ms step_avg:363.46ms step:4895/6000 train_loss:3.4982 train_time:1775389ms step_avg:363.44ms step:4896/6000 train_loss:3.3728 train_time:1775643ms step_avg:363.41ms step:4897/6000 train_loss:3.3322 train_time:1775899ms step_avg:363.39ms step:4898/6000 train_loss:3.5207 train_time:1776152ms step_avg:363.37ms step:4899/6000 train_loss:3.2863 train_time:1776407ms step_avg:363.35ms step:4900/6000 train_loss:3.3383 train_time:1776661ms step_avg:363.33ms step:4901/6000 train_loss:3.2487 train_time:1777987ms step_avg:363.52ms step:4902/6000 train_loss:3.2162 train_time:1778239ms step_avg:363.50ms step:4903/6000 train_loss:3.3507 train_time:1778493ms step_avg:363.48ms step:4904/6000 train_loss:3.3009 train_time:1778748ms step_avg:363.45ms step:4905/6000 train_loss:3.3744 train_time:1779003ms step_avg:363.43ms step:4906/6000 train_loss:3.4082 train_time:1779257ms step_avg:363.41ms step:4907/6000 train_loss:3.2949 train_time:1779511ms step_avg:363.39ms step:4908/6000 train_loss:3.3784 train_time:1779765ms step_avg:363.37ms step:4909/6000 train_loss:3.2759 train_time:1780020ms step_avg:363.34ms step:4910/6000 train_loss:3.3892 train_time:1780275ms step_avg:363.32ms step:4911/6000 train_loss:3.4233 train_time:1781605ms step_avg:363.52ms step:4912/6000 train_loss:3.3368 train_time:1781855ms step_avg:363.50ms step:4913/6000 train_loss:3.2991 train_time:1782110ms step_avg:363.47ms step:4914/6000 train_loss:3.2943 train_time:1782508ms step_avg:363.48ms step:4915/6000 train_loss:3.2246 train_time:1782761ms step_avg:363.46ms step:4916/6000 train_loss:3.4262 train_time:1783015ms step_avg:363.44ms step:4917/6000 train_loss:3.4082 train_time:1783269ms step_avg:363.41ms step:4918/6000 train_loss:3.3241 train_time:1783524ms step_avg:363.39ms step:4919/6000 train_loss:3.3400 train_time:1783778ms step_avg:363.37ms step:4920/6000 train_loss:3.3438 train_time:1784034ms step_avg:363.35ms step:4921/6000 train_loss:3.4173 train_time:1785361ms step_avg:363.54ms step:4922/6000 train_loss:3.5804 train_time:1785613ms step_avg:363.52ms step:4923/6000 train_loss:3.4351 train_time:1785867ms step_avg:363.50ms step:4924/6000 train_loss:3.3038 train_time:1786121ms step_avg:363.48ms step:4925/6000 train_loss:3.6146 train_time:1786375ms step_avg:363.45ms step:4926/6000 train_loss:3.3535 train_time:1786630ms step_avg:363.43ms step:4927/6000 train_loss:3.3293 train_time:1786884ms step_avg:363.41ms step:4928/6000 train_loss:3.2485 train_time:1787139ms step_avg:363.39ms step:4929/6000 train_loss:3.2643 train_time:1787394ms step_avg:363.37ms step:4930/6000 train_loss:3.4085 train_time:1787649ms step_avg:363.34ms step:4931/6000 train_loss:3.6672 train_time:1788976ms step_avg:363.54ms step:4932/6000 train_loss:3.2627 train_time:1789228ms step_avg:363.52ms step:4933/6000 train_loss:3.3525 train_time:1789483ms step_avg:363.49ms step:4934/6000 train_loss:3.4218 train_time:1789737ms step_avg:363.47ms step:4935/6000 train_loss:3.2161 train_time:1789991ms step_avg:363.45ms step:4936/6000 train_loss:3.3587 train_time:1790245ms step_avg:363.43ms step:4937/6000 train_loss:3.4198 train_time:1790503ms step_avg:363.41ms step:4938/6000 train_loss:3.3999 train_time:1790759ms step_avg:363.38ms step:4939/6000 train_loss:3.3998 train_time:1791015ms step_avg:363.36ms step:4940/6000 train_loss:3.5160 train_time:1791433ms step_avg:363.37ms step:4941/6000 train_loss:3.3558 train_time:1792758ms step_avg:363.57ms step:4942/6000 train_loss:3.3664 train_time:1793012ms step_avg:363.55ms step:4943/6000 train_loss:3.1191 train_time:1793266ms step_avg:363.52ms step:4944/6000 train_loss:3.5988 train_time:1793520ms step_avg:363.50ms step:4945/6000 train_loss:3.5537 train_time:1793775ms step_avg:363.48ms step:4946/6000 train_loss:3.1863 train_time:1794030ms step_avg:363.46ms step:4947/6000 train_loss:3.4436 train_time:1794285ms step_avg:363.44ms step:4948/6000 train_loss:3.4575 train_time:1794541ms step_avg:363.41ms step:4949/6000 train_loss:3.2944 train_time:1794795ms step_avg:363.39ms step:4950/6000 train_loss:3.4250 train_time:1795049ms step_avg:363.37ms step:4951/6000 train_loss:3.2766 train_time:1796376ms step_avg:363.57ms step:4952/6000 train_loss:3.4080 train_time:1796631ms step_avg:363.54ms step:4953/6000 train_loss:3.3727 train_time:1796885ms step_avg:363.52ms step:4954/6000 train_loss:3.2521 train_time:1797139ms step_avg:363.50ms step:4955/6000 train_loss:3.3998 train_time:1797394ms step_avg:363.48ms step:4956/6000 train_loss:3.2248 train_time:1797650ms step_avg:363.46ms step:4957/6000 train_loss:3.3230 train_time:1797905ms step_avg:363.43ms step:4958/6000 train_loss:3.3016 train_time:1798159ms step_avg:363.41ms step:4959/6000 train_loss:3.3245 train_time:1798414ms step_avg:363.39ms step:4960/6000 train_loss:3.3590 train_time:1798668ms step_avg:363.37ms step:4961/6000 train_loss:3.5023 train_time:1799995ms step_avg:363.56ms step:4962/6000 train_loss:3.2711 train_time:1800247ms step_avg:363.54ms step:4963/6000 train_loss:3.4060 train_time:1800502ms step_avg:363.52ms step:4964/6000 train_loss:3.2394 train_time:1800757ms step_avg:363.50ms step:4965/6000 train_loss:3.9660 train_time:1801011ms step_avg:363.47ms step:4966/6000 train_loss:3.2397 train_time:1801265ms step_avg:363.45ms step:4967/6000 train_loss:3.3750 train_time:1801519ms step_avg:363.43ms step:4968/6000 train_loss:3.1995 train_time:1801773ms step_avg:363.41ms step:4969/6000 train_loss:3.9114 train_time:1802028ms step_avg:363.39ms step:4970/6000 train_loss:3.4483 train_time:1802283ms step_avg:363.36ms step:4971/6000 train_loss:3.3609 train_time:1803610ms step_avg:363.56ms step:4972/6000 train_loss:3.3029 train_time:1803862ms step_avg:363.54ms step:4973/6000 train_loss:3.3924 train_time:1804117ms step_avg:363.51ms step:4974/6000 train_loss:3.2672 train_time:1804374ms step_avg:363.49ms step:4975/6000 train_loss:3.2593 train_time:1804630ms step_avg:363.47ms step:4976/6000 train_loss:3.4093 train_time:1804885ms step_avg:363.45ms step:4977/6000 train_loss:3.3343 train_time:1805140ms step_avg:363.43ms step:4978/6000 train_loss:3.2909 train_time:1805394ms step_avg:363.40ms step:4979/6000 train_loss:3.3501 train_time:1805649ms step_avg:363.38ms step:4980/6000 train_loss:3.2913 train_time:1805904ms step_avg:363.36ms step:4981/6000 train_loss:3.4303 train_time:1807230ms step_avg:363.55ms step:4982/6000 train_loss:3.4003 train_time:1807483ms step_avg:363.53ms step:4983/6000 train_loss:3.2002 train_time:1807737ms step_avg:363.51ms step:4984/6000 train_loss:3.2256 train_time:1807991ms step_avg:363.49ms step:4985/6000 train_loss:3.5107 train_time:1808246ms step_avg:363.47ms step:4986/6000 train_loss:3.4068 train_time:1808500ms step_avg:363.44ms step:4987/6000 train_loss:3.3151 train_time:1808755ms step_avg:363.42ms step:4988/6000 train_loss:3.3485 train_time:1809011ms step_avg:363.40ms step:4989/6000 train_loss:3.3338 train_time:1809265ms step_avg:363.38ms step:4990/6000 train_loss:3.3194 train_time:1809520ms step_avg:363.36ms step:4991/6000 train_loss:3.3705 train_time:1810845ms step_avg:363.55ms step:4992/6000 train_loss:3.4029 train_time:1811097ms step_avg:363.53ms step:4993/6000 train_loss:3.2309 train_time:1811352ms step_avg:363.51ms step:4994/6000 train_loss:3.3667 train_time:1811606ms step_avg:363.48ms step:4995/6000 train_loss:3.2897 train_time:1811860ms step_avg:363.46ms step:4996/6000 train_loss:3.4322 train_time:1812115ms step_avg:363.44ms step:4997/6000 train_loss:3.3050 train_time:1812370ms step_avg:363.42ms step:4998/6000 train_loss:3.4628 train_time:1812625ms step_avg:363.40ms step:4999/6000 train_loss:3.3284 train_time:1812879ms step_avg:363.38ms step:5000/6000 train_loss:3.4494 train_time:1813135ms step_avg:363.35ms step:5000/6000 val_loss:3.3465 train_time:1813169ms step_avg:363.36ms step:5001/6000 train_loss:3.3769 train_time:1814464ms step_avg:363.55ms step:5002/6000 train_loss:3.3765 train_time:1814717ms step_avg:363.53ms step:5003/6000 train_loss:3.2630 train_time:1814972ms step_avg:363.50ms step:5004/6000 train_loss:3.3393 train_time:1815227ms step_avg:363.48ms step:5005/6000 train_loss:3.3498 train_time:1815482ms step_avg:363.46ms step:5006/6000 train_loss:3.2234 train_time:1815736ms step_avg:363.44ms step:5007/6000 train_loss:3.4623 train_time:1815991ms step_avg:363.42ms step:5008/6000 train_loss:3.3028 train_time:1816248ms step_avg:363.39ms step:5009/6000 train_loss:3.3390 train_time:1816503ms step_avg:363.37ms step:5010/6000 train_loss:3.3055 train_time:1816759ms step_avg:363.35ms step:5011/6000 train_loss:3.5072 train_time:1818085ms step_avg:363.54ms step:5012/6000 train_loss:3.3096 train_time:1818338ms step_avg:363.52ms step:5013/6000 train_loss:3.2926 train_time:1818592ms step_avg:363.50ms step:5014/6000 train_loss:3.2514 train_time:1818847ms step_avg:363.48ms step:5015/6000 train_loss:3.3550 train_time:1819100ms step_avg:363.46ms step:5016/6000 train_loss:3.3335 train_time:1819356ms step_avg:363.44ms step:5017/6000 train_loss:3.3717 train_time:1819610ms step_avg:363.41ms step:5018/6000 train_loss:3.3906 train_time:1819865ms step_avg:363.39ms step:5019/6000 train_loss:3.3546 train_time:1820120ms step_avg:363.37ms step:5020/6000 train_loss:3.8573 train_time:1820375ms step_avg:363.35ms step:5021/6000 train_loss:3.2921 train_time:1821701ms step_avg:363.54ms step:5022/6000 train_loss:3.4008 train_time:1821955ms step_avg:363.52ms step:5023/6000 train_loss:3.3321 train_time:1822210ms step_avg:363.50ms step:5024/6000 train_loss:3.4663 train_time:1822464ms step_avg:363.48ms step:5025/6000 train_loss:3.2632 train_time:1822718ms step_avg:363.45ms step:5026/6000 train_loss:3.4263 train_time:1822974ms step_avg:363.43ms step:5027/6000 train_loss:3.2779 train_time:1823228ms step_avg:363.41ms step:5028/6000 train_loss:3.4903 train_time:1823485ms step_avg:363.39ms step:5029/6000 train_loss:3.3914 train_time:1823738ms step_avg:363.37ms step:5030/6000 train_loss:3.4234 train_time:1823992ms step_avg:363.35ms step:5031/6000 train_loss:3.2739 train_time:1825319ms step_avg:363.54ms step:5032/6000 train_loss:3.3210 train_time:1825574ms step_avg:363.52ms step:5033/6000 train_loss:3.2613 train_time:1825829ms step_avg:363.49ms step:5034/6000 train_loss:3.4504 train_time:1826083ms step_avg:363.47ms step:5035/6000 train_loss:3.4420 train_time:1826337ms step_avg:363.45ms step:5036/6000 train_loss:3.3023 train_time:1826593ms step_avg:363.43ms step:5037/6000 train_loss:3.2231 train_time:1826849ms step_avg:363.41ms step:5038/6000 train_loss:3.2549 train_time:1827105ms step_avg:363.39ms step:5039/6000 train_loss:3.3941 train_time:1827361ms step_avg:363.36ms step:5040/6000 train_loss:3.3241 train_time:1827616ms step_avg:363.34ms step:5041/6000 train_loss:3.5040 train_time:1828942ms step_avg:363.53ms step:5042/6000 train_loss:3.2893 train_time:1829194ms step_avg:363.51ms step:5043/6000 train_loss:3.4709 train_time:1829449ms step_avg:363.49ms step:5044/6000 train_loss:3.3880 train_time:1829703ms step_avg:363.47ms step:5045/6000 train_loss:3.4480 train_time:1829959ms step_avg:363.45ms step:5046/6000 train_loss:3.2770 train_time:1830213ms step_avg:363.43ms step:5047/6000 train_loss:3.4104 train_time:1830469ms step_avg:363.40ms step:5048/6000 train_loss:3.1556 train_time:1830723ms step_avg:363.38ms step:5049/6000 train_loss:3.3235 train_time:1830978ms step_avg:363.36ms step:5050/6000 train_loss:3.3299 train_time:1831233ms step_avg:363.34ms step:5051/6000 train_loss:3.2721 train_time:1832561ms step_avg:363.53ms step:5052/6000 train_loss:3.3104 train_time:1832811ms step_avg:363.51ms step:5053/6000 train_loss:3.3513 train_time:1833066ms step_avg:363.49ms step:5054/6000 train_loss:3.3991 train_time:1833319ms step_avg:363.47ms step:5055/6000 train_loss:3.4799 train_time:1833575ms step_avg:363.44ms step:5056/6000 train_loss:3.4174 train_time:1833830ms step_avg:363.42ms step:5057/6000 train_loss:3.3041 train_time:1834085ms step_avg:363.40ms step:5058/6000 train_loss:3.1968 train_time:1834340ms step_avg:363.38ms step:5059/6000 train_loss:3.1130 train_time:1834596ms step_avg:363.36ms step:5060/6000 train_loss:3.3483 train_time:1834851ms step_avg:363.34ms step:5061/6000 train_loss:3.4362 train_time:1836178ms step_avg:363.53ms step:5062/6000 train_loss:3.3494 train_time:1836430ms step_avg:363.51ms step:5063/6000 train_loss:3.4684 train_time:1836684ms step_avg:363.48ms step:5064/6000 train_loss:3.4771 train_time:1836939ms step_avg:363.46ms step:5065/6000 train_loss:3.3183 train_time:1837192ms step_avg:363.44ms step:5066/6000 train_loss:3.4430 train_time:1837448ms step_avg:363.42ms step:5067/6000 train_loss:3.6392 train_time:1837702ms step_avg:363.40ms step:5068/6000 train_loss:3.2950 train_time:1837956ms step_avg:363.38ms step:5069/6000 train_loss:3.6248 train_time:1838211ms step_avg:363.35ms step:5070/6000 train_loss:3.3518 train_time:1838466ms step_avg:363.33ms step:5071/6000 train_loss:3.7654 train_time:1839792ms step_avg:363.52ms step:5072/6000 train_loss:3.2953 train_time:1840047ms step_avg:363.50ms step:5073/6000 train_loss:3.3565 train_time:1840301ms step_avg:363.48ms step:5074/6000 train_loss:3.4837 train_time:1840556ms step_avg:363.46ms step:5075/6000 train_loss:3.3244 train_time:1840810ms step_avg:363.44ms step:5076/6000 train_loss:3.3142 train_time:1841064ms step_avg:363.42ms step:5077/6000 train_loss:3.2866 train_time:1841318ms step_avg:363.39ms step:5078/6000 train_loss:3.3592 train_time:1841575ms step_avg:363.37ms step:5079/6000 train_loss:3.5016 train_time:1841829ms step_avg:363.35ms step:5080/6000 train_loss:3.4745 train_time:1842085ms step_avg:363.33ms step:5081/6000 train_loss:3.2970 train_time:1843410ms step_avg:363.52ms step:5082/6000 train_loss:3.4239 train_time:1843664ms step_avg:363.50ms step:5083/6000 train_loss:3.2803 train_time:1843919ms step_avg:363.48ms step:5084/6000 train_loss:3.3623 train_time:1844173ms step_avg:363.46ms step:5085/6000 train_loss:3.2564 train_time:1844427ms step_avg:363.43ms step:5086/6000 train_loss:4.1089 train_time:1844682ms step_avg:363.41ms step:5087/6000 train_loss:3.3936 train_time:1844936ms step_avg:363.39ms step:5088/6000 train_loss:3.3071 train_time:1845191ms step_avg:363.37ms step:5089/6000 train_loss:3.2984 train_time:1845446ms step_avg:363.35ms step:5090/6000 train_loss:3.4515 train_time:1845700ms step_avg:363.33ms step:5091/6000 train_loss:3.3775 train_time:1847027ms step_avg:363.52ms step:5092/6000 train_loss:3.2726 train_time:1847279ms step_avg:363.49ms step:5093/6000 train_loss:3.3021 train_time:1847533ms step_avg:363.47ms step:5094/6000 train_loss:3.2931 train_time:1847789ms step_avg:363.45ms step:5095/6000 train_loss:3.2115 train_time:1848044ms step_avg:363.43ms step:5096/6000 train_loss:3.3449 train_time:1848298ms step_avg:363.41ms step:5097/6000 train_loss:3.1242 train_time:1848554ms step_avg:363.39ms step:5098/6000 train_loss:3.4212 train_time:1848809ms step_avg:363.37ms step:5099/6000 train_loss:3.2795 train_time:1849064ms step_avg:363.35ms step:5100/6000 train_loss:3.3250 train_time:1849318ms step_avg:363.32ms step:5101/6000 train_loss:3.3131 train_time:1850645ms step_avg:363.51ms step:5102/6000 train_loss:3.2062 train_time:1850897ms step_avg:363.49ms step:5103/6000 train_loss:3.3625 train_time:1851292ms step_avg:363.50ms step:5104/6000 train_loss:3.3936 train_time:1851547ms step_avg:363.48ms step:5105/6000 train_loss:3.3895 train_time:1851802ms step_avg:363.45ms step:5106/6000 train_loss:3.2838 train_time:1852056ms step_avg:363.43ms step:5107/6000 train_loss:3.3826 train_time:1852310ms step_avg:363.41ms step:5108/6000 train_loss:3.4265 train_time:1852565ms step_avg:363.39ms step:5109/6000 train_loss:3.3298 train_time:1852820ms step_avg:363.37ms step:5110/6000 train_loss:3.3625 train_time:1853076ms step_avg:363.35ms step:5111/6000 train_loss:3.3745 train_time:1854402ms step_avg:363.54ms step:5112/6000 train_loss:3.3478 train_time:1854654ms step_avg:363.52ms step:5113/6000 train_loss:3.4123 train_time:1854910ms step_avg:363.49ms step:5114/6000 train_loss:3.2801 train_time:1855166ms step_avg:363.47ms step:5115/6000 train_loss:3.4502 train_time:1855420ms step_avg:363.45ms step:5116/6000 train_loss:3.2413 train_time:1855676ms step_avg:363.43ms step:5117/6000 train_loss:3.4807 train_time:1855930ms step_avg:363.41ms step:5118/6000 train_loss:3.3336 train_time:1856185ms step_avg:363.39ms step:5119/6000 train_loss:3.3400 train_time:1856440ms step_avg:363.37ms step:5120/6000 train_loss:3.3437 train_time:1856694ms step_avg:363.35ms step:5121/6000 train_loss:3.5515 train_time:1858021ms step_avg:363.53ms step:5122/6000 train_loss:3.3866 train_time:1858273ms step_avg:363.51ms step:5123/6000 train_loss:3.3403 train_time:1858527ms step_avg:363.49ms step:5124/6000 train_loss:3.3924 train_time:1858782ms step_avg:363.47ms step:5125/6000 train_loss:3.3282 train_time:1859036ms step_avg:363.45ms step:5125/6000 val_loss:3.3341 train_time:1859071ms step_avg:363.45ms step:5126/6000 train_loss:3.3705 train_time:1859296ms step_avg:363.43ms step:5127/6000 train_loss:3.2885 train_time:1859555ms step_avg:363.41ms step:5128/6000 train_loss:3.5212 train_time:1859811ms step_avg:363.39ms step:5129/6000 train_loss:3.2840 train_time:1860065ms step_avg:363.36ms step:5130/6000 train_loss:3.2993 train_time:1860474ms step_avg:363.37ms step:5131/6000 train_loss:3.3072 train_time:1861801ms step_avg:363.56ms step:5132/6000 train_loss:3.2087 train_time:1862054ms step_avg:363.54ms step:5133/6000 train_loss:3.1628 train_time:1862307ms step_avg:363.52ms step:5134/6000 train_loss:3.3969 train_time:1862562ms step_avg:363.50ms step:5135/6000 train_loss:3.2881 train_time:1862817ms step_avg:363.48ms step:5136/6000 train_loss:3.2981 train_time:1863072ms step_avg:363.46ms step:5137/6000 train_loss:3.2760 train_time:1863328ms step_avg:363.43ms step:5138/6000 train_loss:3.4381 train_time:1863582ms step_avg:363.41ms step:5139/6000 train_loss:3.2967 train_time:1863837ms step_avg:363.39ms step:5140/6000 train_loss:3.4515 train_time:1864092ms step_avg:363.37ms step:5141/6000 train_loss:3.2381 train_time:1865418ms step_avg:363.56ms step:5142/6000 train_loss:3.3602 train_time:1865669ms step_avg:363.54ms step:5143/6000 train_loss:3.2902 train_time:1865923ms step_avg:363.52ms step:5144/6000 train_loss:3.8935 train_time:1866178ms step_avg:363.49ms step:5145/6000 train_loss:3.6278 train_time:1866433ms step_avg:363.47ms step:5146/6000 train_loss:3.3362 train_time:1866689ms step_avg:363.45ms step:5147/6000 train_loss:3.2387 train_time:1866943ms step_avg:363.43ms step:5148/6000 train_loss:3.3068 train_time:1867198ms step_avg:363.41ms step:5149/6000 train_loss:3.5692 train_time:1867453ms step_avg:363.39ms step:5150/6000 train_loss:3.2279 train_time:1867708ms step_avg:363.37ms step:5151/6000 train_loss:4.0160 train_time:1869035ms step_avg:363.55ms step:5152/6000 train_loss:3.3789 train_time:1869287ms step_avg:363.53ms step:5153/6000 train_loss:3.3618 train_time:1869541ms step_avg:363.51ms step:5154/6000 train_loss:3.3905 train_time:1869795ms step_avg:363.49ms step:5155/6000 train_loss:3.3421 train_time:1870050ms step_avg:363.47ms step:5156/6000 train_loss:3.2828 train_time:1870305ms step_avg:363.45ms step:5157/6000 train_loss:3.3142 train_time:1870560ms step_avg:363.43ms step:5158/6000 train_loss:3.3371 train_time:1870816ms step_avg:363.41ms step:5159/6000 train_loss:3.2343 train_time:1871070ms step_avg:363.39ms step:5160/6000 train_loss:3.4048 train_time:1871325ms step_avg:363.36ms step:5161/6000 train_loss:3.3269 train_time:1872652ms step_avg:363.55ms step:5162/6000 train_loss:3.4686 train_time:1872905ms step_avg:363.53ms step:5163/6000 train_loss:3.2059 train_time:1873158ms step_avg:363.51ms step:5164/6000 train_loss:3.5597 train_time:1873413ms step_avg:363.49ms step:5165/6000 train_loss:3.3423 train_time:1873667ms step_avg:363.47ms step:5166/6000 train_loss:3.3157 train_time:1873922ms step_avg:363.44ms step:5167/6000 train_loss:3.3564 train_time:1874176ms step_avg:363.42ms step:5168/6000 train_loss:3.2597 train_time:1874431ms step_avg:363.40ms step:5169/6000 train_loss:3.2693 train_time:1874685ms step_avg:363.38ms step:5170/6000 train_loss:3.3183 train_time:1874939ms step_avg:363.36ms step:5171/6000 train_loss:3.3531 train_time:1876266ms step_avg:363.55ms step:5172/6000 train_loss:3.4441 train_time:1876517ms step_avg:363.53ms step:5173/6000 train_loss:3.4006 train_time:1876771ms step_avg:363.50ms step:5174/6000 train_loss:3.3636 train_time:1877026ms step_avg:363.48ms step:5175/6000 train_loss:3.3361 train_time:1877281ms step_avg:363.46ms step:5176/6000 train_loss:3.3503 train_time:1877536ms step_avg:363.44ms step:5177/6000 train_loss:3.3192 train_time:1877791ms step_avg:363.42ms step:5178/6000 train_loss:3.3010 train_time:1878045ms step_avg:363.40ms step:5179/6000 train_loss:3.3654 train_time:1878300ms step_avg:363.38ms step:5180/6000 train_loss:3.4144 train_time:1878554ms step_avg:363.36ms step:5181/6000 train_loss:3.3049 train_time:1879881ms step_avg:363.54ms step:5182/6000 train_loss:3.2842 train_time:1880133ms step_avg:363.52ms step:5183/6000 train_loss:3.3473 train_time:1880387ms step_avg:363.50ms step:5184/6000 train_loss:3.3292 train_time:1880641ms step_avg:363.48ms step:5185/6000 train_loss:3.2788 train_time:1880896ms step_avg:363.46ms step:5186/6000 train_loss:3.3256 train_time:1881150ms step_avg:363.44ms step:5187/6000 train_loss:3.3588 train_time:1881406ms step_avg:363.42ms step:5188/6000 train_loss:3.3309 train_time:1881659ms step_avg:363.39ms step:5189/6000 train_loss:3.4672 train_time:1881915ms step_avg:363.37ms step:5190/6000 train_loss:3.2661 train_time:1882169ms step_avg:363.35ms step:5191/6000 train_loss:3.3158 train_time:1883495ms step_avg:363.54ms step:5192/6000 train_loss:3.4089 train_time:1883747ms step_avg:363.52ms step:5193/6000 train_loss:3.4104 train_time:1884002ms step_avg:363.50ms step:5194/6000 train_loss:3.2799 train_time:1884256ms step_avg:363.48ms step:5195/6000 train_loss:3.3959 train_time:1884511ms step_avg:363.45ms step:5196/6000 train_loss:3.3563 train_time:1884765ms step_avg:363.43ms step:5197/6000 train_loss:3.3216 train_time:1885020ms step_avg:363.41ms step:5198/6000 train_loss:3.4872 train_time:1885274ms step_avg:363.39ms step:5199/6000 train_loss:3.3540 train_time:1885529ms step_avg:363.37ms step:5200/6000 train_loss:3.3335 train_time:1885784ms step_avg:363.35ms step:5201/6000 train_loss:3.2731 train_time:1887113ms step_avg:363.54ms step:5202/6000 train_loss:3.2622 train_time:1887364ms step_avg:363.51ms step:5203/6000 train_loss:3.1013 train_time:1887619ms step_avg:363.49ms step:5204/6000 train_loss:3.3206 train_time:1887872ms step_avg:363.47ms step:5205/6000 train_loss:3.1687 train_time:1888126ms step_avg:363.45ms step:5206/6000 train_loss:3.1827 train_time:1888380ms step_avg:363.43ms step:5207/6000 train_loss:3.4824 train_time:1888635ms step_avg:363.41ms step:5208/6000 train_loss:3.2835 train_time:1888891ms step_avg:363.39ms step:5209/6000 train_loss:3.5824 train_time:1889145ms step_avg:363.37ms step:5210/6000 train_loss:3.3754 train_time:1889400ms step_avg:363.35ms step:5211/6000 train_loss:3.4082 train_time:1890727ms step_avg:363.53ms step:5212/6000 train_loss:3.3322 train_time:1890979ms step_avg:363.51ms step:5213/6000 train_loss:3.2116 train_time:1891235ms step_avg:363.49ms step:5214/6000 train_loss:3.3685 train_time:1891489ms step_avg:363.47ms step:5215/6000 train_loss:3.2511 train_time:1891744ms step_avg:363.45ms step:5216/6000 train_loss:3.6103 train_time:1891999ms step_avg:363.43ms step:5217/6000 train_loss:3.3343 train_time:1892253ms step_avg:363.41ms step:5218/6000 train_loss:3.3941 train_time:1892508ms step_avg:363.38ms step:5219/6000 train_loss:3.3036 train_time:1892762ms step_avg:363.36ms step:5220/6000 train_loss:3.1690 train_time:1893021ms step_avg:363.34ms step:5221/6000 train_loss:3.5119 train_time:1894347ms step_avg:363.53ms step:5222/6000 train_loss:3.2309 train_time:1894599ms step_avg:363.51ms step:5223/6000 train_loss:3.6530 train_time:1894854ms step_avg:363.49ms step:5224/6000 train_loss:3.3097 train_time:1895109ms step_avg:363.47ms step:5225/6000 train_loss:3.3199 train_time:1895365ms step_avg:363.44ms step:5226/6000 train_loss:3.2601 train_time:1895619ms step_avg:363.42ms step:5227/6000 train_loss:3.3591 train_time:1895874ms step_avg:363.40ms step:5228/6000 train_loss:3.3581 train_time:1896130ms step_avg:363.38ms step:5229/6000 train_loss:3.2369 train_time:1896385ms step_avg:363.36ms step:5230/6000 train_loss:3.3645 train_time:1896639ms step_avg:363.34ms step:5231/6000 train_loss:3.4179 train_time:1897964ms step_avg:363.53ms step:5232/6000 train_loss:3.2434 train_time:1898216ms step_avg:363.50ms step:5233/6000 train_loss:3.2689 train_time:1898470ms step_avg:363.48ms step:5234/6000 train_loss:3.3039 train_time:1898725ms step_avg:363.46ms step:5235/6000 train_loss:3.5405 train_time:1898980ms step_avg:363.44ms step:5236/6000 train_loss:3.3725 train_time:1899236ms step_avg:363.42ms step:5237/6000 train_loss:3.3255 train_time:1899492ms step_avg:363.40ms step:5238/6000 train_loss:3.2554 train_time:1899746ms step_avg:363.38ms step:5239/6000 train_loss:3.5205 train_time:1900000ms step_avg:363.36ms step:5240/6000 train_loss:3.2879 train_time:1900255ms step_avg:363.34ms step:5241/6000 train_loss:3.3653 train_time:1901581ms step_avg:363.52ms step:5242/6000 train_loss:3.2831 train_time:1901834ms step_avg:363.50ms step:5243/6000 train_loss:3.2385 train_time:1902089ms step_avg:363.48ms step:5244/6000 train_loss:3.3122 train_time:1902343ms step_avg:363.46ms step:5245/6000 train_loss:3.3525 train_time:1902597ms step_avg:363.44ms step:5246/6000 train_loss:3.3169 train_time:1902854ms step_avg:363.42ms step:5247/6000 train_loss:3.2692 train_time:1903110ms step_avg:363.40ms step:5248/6000 train_loss:3.1240 train_time:1903364ms step_avg:363.38ms step:5249/6000 train_loss:3.5002 train_time:1903618ms step_avg:363.36ms step:5250/6000 train_loss:3.3050 train_time:1903873ms step_avg:363.33ms step:5250/6000 val_loss:3.3242 train_time:1903908ms step_avg:363.34ms step:5251/6000 train_loss:3.3089 train_time:1905204ms step_avg:363.52ms step:5252/6000 train_loss:3.2463 train_time:1905457ms step_avg:363.50ms step:5253/6000 train_loss:3.4332 train_time:1905712ms step_avg:363.48ms step:5254/6000 train_loss:3.3239 train_time:1905967ms step_avg:363.46ms step:5255/6000 train_loss:3.1892 train_time:1906221ms step_avg:363.44ms step:5256/6000 train_loss:3.4213 train_time:1906476ms step_avg:363.42ms step:5257/6000 train_loss:3.3678 train_time:1906730ms step_avg:363.39ms step:5258/6000 train_loss:3.2018 train_time:1906986ms step_avg:363.37ms step:5259/6000 train_loss:3.3593 train_time:1907241ms step_avg:363.35ms step:5260/6000 train_loss:3.2145 train_time:1907495ms step_avg:363.33ms step:5261/6000 train_loss:3.4261 train_time:1908821ms step_avg:363.52ms step:5262/6000 train_loss:3.2569 train_time:1909073ms step_avg:363.49ms step:5263/6000 train_loss:3.2978 train_time:1909328ms step_avg:363.47ms step:5264/6000 train_loss:3.2933 train_time:1909584ms step_avg:363.45ms step:5265/6000 train_loss:3.3311 train_time:1909839ms step_avg:363.43ms step:5266/6000 train_loss:3.3089 train_time:1910092ms step_avg:363.41ms step:5267/6000 train_loss:3.3365 train_time:1910347ms step_avg:363.39ms step:5268/6000 train_loss:3.2460 train_time:1910602ms step_avg:363.37ms step:5269/6000 train_loss:3.3777 train_time:1910855ms step_avg:363.35ms step:5270/6000 train_loss:3.2663 train_time:1911110ms step_avg:363.33ms step:5271/6000 train_loss:3.3716 train_time:1912438ms step_avg:363.51ms step:5272/6000 train_loss:3.3939 train_time:1912690ms step_avg:363.49ms step:5273/6000 train_loss:3.2424 train_time:1912944ms step_avg:363.47ms step:5274/6000 train_loss:3.3049 train_time:1913199ms step_avg:363.45ms step:5275/6000 train_loss:3.3865 train_time:1913454ms step_avg:363.43ms step:5276/6000 train_loss:3.4888 train_time:1913709ms step_avg:363.41ms step:5277/6000 train_loss:3.3327 train_time:1913964ms step_avg:363.39ms step:5278/6000 train_loss:3.2486 train_time:1914217ms step_avg:363.37ms step:5279/6000 train_loss:3.4444 train_time:1914473ms step_avg:363.35ms step:5280/6000 train_loss:3.4818 train_time:1914728ms step_avg:363.33ms step:5281/6000 train_loss:3.3054 train_time:1916056ms step_avg:363.51ms step:5282/6000 train_loss:3.1951 train_time:1916307ms step_avg:363.49ms step:5283/6000 train_loss:3.3207 train_time:1916563ms step_avg:363.47ms step:5284/6000 train_loss:3.3129 train_time:1916817ms step_avg:363.45ms step:5285/6000 train_loss:3.3249 train_time:1917072ms step_avg:363.43ms step:5286/6000 train_loss:3.3187 train_time:1917326ms step_avg:363.41ms step:5287/6000 train_loss:3.2286 train_time:1917580ms step_avg:363.38ms step:5288/6000 train_loss:3.2327 train_time:1917835ms step_avg:363.36ms step:5289/6000 train_loss:3.2288 train_time:1918089ms step_avg:363.34ms step:5290/6000 train_loss:3.3551 train_time:1918344ms step_avg:363.32ms step:5291/6000 train_loss:3.4770 train_time:1919671ms step_avg:363.51ms step:5292/6000 train_loss:3.3056 train_time:1920058ms step_avg:363.51ms step:5293/6000 train_loss:3.4138 train_time:1920313ms step_avg:363.49ms step:5294/6000 train_loss:3.3915 train_time:1920567ms step_avg:363.47ms step:5295/6000 train_loss:3.4317 train_time:1920821ms step_avg:363.45ms step:5296/6000 train_loss:3.2754 train_time:1921075ms step_avg:363.43ms step:5297/6000 train_loss:3.2971 train_time:1921330ms step_avg:363.41ms step:5298/6000 train_loss:3.2877 train_time:1921585ms step_avg:363.39ms step:5299/6000 train_loss:3.2405 train_time:1921841ms step_avg:363.37ms step:5300/6000 train_loss:3.3810 train_time:1922095ms step_avg:363.34ms step:5301/6000 train_loss:3.3390 train_time:1923420ms step_avg:363.53ms step:5302/6000 train_loss:3.2050 train_time:1923673ms step_avg:363.51ms step:5303/6000 train_loss:3.4222 train_time:1923928ms step_avg:363.49ms step:5304/6000 train_loss:3.1654 train_time:1924185ms step_avg:363.47ms step:5305/6000 train_loss:3.1764 train_time:1924439ms step_avg:363.44ms step:5306/6000 train_loss:3.3626 train_time:1924694ms step_avg:363.42ms step:5307/6000 train_loss:3.2569 train_time:1924948ms step_avg:363.40ms step:5308/6000 train_loss:3.2909 train_time:1925203ms step_avg:363.38ms step:5309/6000 train_loss:3.1217 train_time:1925460ms step_avg:363.36ms step:5310/6000 train_loss:3.2735 train_time:1925715ms step_avg:363.34ms step:5311/6000 train_loss:3.2027 train_time:1927041ms step_avg:363.52ms step:5312/6000 train_loss:3.5240 train_time:1927293ms step_avg:363.50ms step:5313/6000 train_loss:3.2875 train_time:1927548ms step_avg:363.48ms step:5314/6000 train_loss:3.2702 train_time:1927801ms step_avg:363.46ms step:5315/6000 train_loss:3.4065 train_time:1928055ms step_avg:363.44ms step:5316/6000 train_loss:3.3468 train_time:1928310ms step_avg:363.42ms step:5317/6000 train_loss:3.5033 train_time:1928565ms step_avg:363.40ms step:5318/6000 train_loss:3.5136 train_time:1928821ms step_avg:363.38ms step:5319/6000 train_loss:3.9650 train_time:1929075ms step_avg:363.36ms step:5320/6000 train_loss:3.4981 train_time:1929481ms step_avg:363.37ms step:5321/6000 train_loss:3.2674 train_time:1930807ms step_avg:363.55ms step:5322/6000 train_loss:3.4841 train_time:1931060ms step_avg:363.53ms step:5323/6000 train_loss:3.4961 train_time:1931313ms step_avg:363.51ms step:5324/6000 train_loss:3.1913 train_time:1931568ms step_avg:363.49ms step:5325/6000 train_loss:3.1250 train_time:1931823ms step_avg:363.47ms step:5326/6000 train_loss:3.4169 train_time:1932079ms step_avg:363.45ms step:5327/6000 train_loss:3.2047 train_time:1932333ms step_avg:363.43ms step:5328/6000 train_loss:3.2689 train_time:1932587ms step_avg:363.40ms step:5329/6000 train_loss:3.2080 train_time:1932842ms step_avg:363.38ms step:5330/6000 train_loss:3.5241 train_time:1933096ms step_avg:363.36ms step:5331/6000 train_loss:3.2924 train_time:1934423ms step_avg:363.54ms step:5332/6000 train_loss:3.2545 train_time:1934674ms step_avg:363.52ms step:5333/6000 train_loss:3.3516 train_time:1934928ms step_avg:363.50ms step:5334/6000 train_loss:3.3403 train_time:1935183ms step_avg:363.48ms step:5335/6000 train_loss:4.6182 train_time:1935437ms step_avg:363.46ms step:5336/6000 train_loss:3.2940 train_time:1935691ms step_avg:363.44ms step:5337/6000 train_loss:3.4862 train_time:1935946ms step_avg:363.42ms step:5338/6000 train_loss:3.2854 train_time:1936201ms step_avg:363.40ms step:5339/6000 train_loss:3.2542 train_time:1936455ms step_avg:363.38ms step:5340/6000 train_loss:3.3630 train_time:1936709ms step_avg:363.36ms step:5341/6000 train_loss:3.2206 train_time:1938036ms step_avg:363.54ms step:5342/6000 train_loss:3.2728 train_time:1938288ms step_avg:363.52ms step:5343/6000 train_loss:3.5088 train_time:1938543ms step_avg:363.50ms step:5344/6000 train_loss:3.2319 train_time:1938797ms step_avg:363.48ms step:5345/6000 train_loss:3.3707 train_time:1939052ms step_avg:363.46ms step:5346/6000 train_loss:3.3143 train_time:1939307ms step_avg:363.44ms step:5347/6000 train_loss:3.2637 train_time:1939563ms step_avg:363.42ms step:5348/6000 train_loss:3.2952 train_time:1939818ms step_avg:363.40ms step:5349/6000 train_loss:3.3060 train_time:1940073ms step_avg:363.38ms step:5350/6000 train_loss:3.3478 train_time:1940327ms step_avg:363.36ms step:5351/6000 train_loss:3.2557 train_time:1941655ms step_avg:363.54ms step:5352/6000 train_loss:3.3428 train_time:1941905ms step_avg:363.52ms step:5353/6000 train_loss:3.3284 train_time:1942160ms step_avg:363.50ms step:5354/6000 train_loss:3.2832 train_time:1942414ms step_avg:363.48ms step:5355/6000 train_loss:3.2855 train_time:1942668ms step_avg:363.46ms step:5356/6000 train_loss:3.3215 train_time:1942922ms step_avg:363.43ms step:5357/6000 train_loss:3.2005 train_time:1943176ms step_avg:363.41ms step:5358/6000 train_loss:3.4203 train_time:1943430ms step_avg:363.39ms step:5359/6000 train_loss:3.2496 train_time:1943685ms step_avg:363.37ms step:5360/6000 train_loss:3.3532 train_time:1943940ms step_avg:363.35ms step:5361/6000 train_loss:2.9392 train_time:1945266ms step_avg:363.53ms step:5362/6000 train_loss:3.2561 train_time:1945518ms step_avg:363.51ms step:5363/6000 train_loss:3.4274 train_time:1945772ms step_avg:363.49ms step:5364/6000 train_loss:3.3330 train_time:1946028ms step_avg:363.47ms step:5365/6000 train_loss:3.2495 train_time:1946282ms step_avg:363.45ms step:5366/6000 train_loss:3.1715 train_time:1946537ms step_avg:363.43ms step:5367/6000 train_loss:3.4352 train_time:1946791ms step_avg:363.41ms step:5368/6000 train_loss:3.3482 train_time:1947046ms step_avg:363.39ms step:5369/6000 train_loss:3.3630 train_time:1947301ms step_avg:363.37ms step:5370/6000 train_loss:3.2959 train_time:1947556ms step_avg:363.35ms step:5371/6000 train_loss:3.2345 train_time:1948882ms step_avg:363.53ms step:5372/6000 train_loss:3.2481 train_time:1949134ms step_avg:363.51ms step:5373/6000 train_loss:3.5237 train_time:1949388ms step_avg:363.49ms step:5374/6000 train_loss:3.2017 train_time:1949643ms step_avg:363.47ms step:5375/6000 train_loss:3.3257 train_time:1949897ms step_avg:363.45ms step:5375/6000 val_loss:3.3134 train_time:1949932ms step_avg:363.45ms step:5376/6000 train_loss:3.0315 train_time:1950157ms step_avg:363.43ms step:5377/6000 train_loss:3.3685 train_time:1950415ms step_avg:363.41ms step:5378/6000 train_loss:3.3108 train_time:1950671ms step_avg:363.39ms step:5379/6000 train_loss:3.3430 train_time:1950927ms step_avg:363.37ms step:5380/6000 train_loss:3.5155 train_time:1951186ms step_avg:363.35ms step:5381/6000 train_loss:3.2713 train_time:1952512ms step_avg:363.53ms step:5382/6000 train_loss:3.4306 train_time:1952764ms step_avg:363.51ms step:5383/6000 train_loss:3.6581 train_time:1953020ms step_avg:363.49ms step:5384/6000 train_loss:3.2351 train_time:1953274ms step_avg:363.47ms step:5385/6000 train_loss:3.3283 train_time:1953529ms step_avg:363.45ms step:5386/6000 train_loss:3.3044 train_time:1953785ms step_avg:363.43ms step:5387/6000 train_loss:3.3389 train_time:1954039ms step_avg:363.41ms step:5388/6000 train_loss:3.3591 train_time:1954294ms step_avg:363.39ms step:5389/6000 train_loss:3.3091 train_time:1954549ms step_avg:363.37ms step:5390/6000 train_loss:3.5195 train_time:1954805ms step_avg:363.35ms step:5391/6000 train_loss:3.2055 train_time:1956130ms step_avg:363.53ms step:5392/6000 train_loss:3.1834 train_time:1956382ms step_avg:363.50ms step:5393/6000 train_loss:3.2044 train_time:1956637ms step_avg:363.48ms step:5394/6000 train_loss:3.4483 train_time:1956891ms step_avg:363.46ms step:5395/6000 train_loss:3.3672 train_time:1957146ms step_avg:363.44ms step:5396/6000 train_loss:3.5733 train_time:1957401ms step_avg:363.42ms step:5397/6000 train_loss:3.4495 train_time:1957656ms step_avg:363.40ms step:5398/6000 train_loss:3.2668 train_time:1957911ms step_avg:363.38ms step:5399/6000 train_loss:3.1970 train_time:1958166ms step_avg:363.36ms step:5400/6000 train_loss:3.5761 train_time:1958421ms step_avg:363.34ms step:5401/6000 train_loss:3.4498 train_time:1959748ms step_avg:363.52ms step:5402/6000 train_loss:3.3188 train_time:1960002ms step_avg:363.50ms step:5403/6000 train_loss:3.2998 train_time:1960255ms step_avg:363.48ms step:5404/6000 train_loss:3.2594 train_time:1960510ms step_avg:363.46ms step:5405/6000 train_loss:3.3430 train_time:1960764ms step_avg:363.44ms step:5406/6000 train_loss:3.3094 train_time:1961021ms step_avg:363.42ms step:5407/6000 train_loss:2.9727 train_time:1961275ms step_avg:363.40ms step:5408/6000 train_loss:3.4121 train_time:1961530ms step_avg:363.38ms step:5409/6000 train_loss:3.3012 train_time:1961787ms step_avg:363.36ms step:5410/6000 train_loss:3.2876 train_time:1962041ms step_avg:363.34ms step:5411/6000 train_loss:3.2893 train_time:1963368ms step_avg:363.52ms step:5412/6000 train_loss:3.3081 train_time:1963619ms step_avg:363.50ms step:5413/6000 train_loss:3.2745 train_time:1963874ms step_avg:363.48ms step:5414/6000 train_loss:3.3976 train_time:1964129ms step_avg:363.46ms step:5415/6000 train_loss:3.3234 train_time:1964383ms step_avg:363.44ms step:5416/6000 train_loss:3.3000 train_time:1964638ms step_avg:363.42ms step:5417/6000 train_loss:3.2471 train_time:1964892ms step_avg:363.40ms step:5418/6000 train_loss:3.3763 train_time:1965146ms step_avg:363.38ms step:5419/6000 train_loss:3.2566 train_time:1965402ms step_avg:363.36ms step:5420/6000 train_loss:3.4276 train_time:1965657ms step_avg:363.34ms step:5421/6000 train_loss:3.2788 train_time:1966984ms step_avg:363.52ms step:5422/6000 train_loss:3.3620 train_time:1967235ms step_avg:363.50ms step:5423/6000 train_loss:3.3603 train_time:1967490ms step_avg:363.48ms step:5424/6000 train_loss:2.9383 train_time:1967747ms step_avg:363.46ms step:5425/6000 train_loss:3.3980 train_time:1968001ms step_avg:363.44ms step:5426/6000 train_loss:3.3208 train_time:1968254ms step_avg:363.41ms step:5427/6000 train_loss:3.3198 train_time:1968510ms step_avg:363.39ms step:5428/6000 train_loss:3.3305 train_time:1968765ms step_avg:363.37ms step:5429/6000 train_loss:3.2900 train_time:1969019ms step_avg:363.35ms step:5430/6000 train_loss:3.2877 train_time:1969274ms step_avg:363.33ms step:5431/6000 train_loss:3.3064 train_time:1970602ms step_avg:363.51ms step:5432/6000 train_loss:3.3023 train_time:1970855ms step_avg:363.49ms step:5433/6000 train_loss:3.3932 train_time:1971110ms step_avg:363.47ms step:5434/6000 train_loss:3.3833 train_time:1971363ms step_avg:363.45ms step:5435/6000 train_loss:3.3046 train_time:1971618ms step_avg:363.43ms step:5436/6000 train_loss:3.3376 train_time:1971873ms step_avg:363.41ms step:5437/6000 train_loss:3.2044 train_time:1972129ms step_avg:363.39ms step:5438/6000 train_loss:3.3420 train_time:1972384ms step_avg:363.37ms step:5439/6000 train_loss:3.4135 train_time:1972638ms step_avg:363.35ms step:5440/6000 train_loss:3.3442 train_time:1972893ms step_avg:363.33ms step:5441/6000 train_loss:3.2544 train_time:1974220ms step_avg:363.51ms step:5442/6000 train_loss:3.2685 train_time:1974470ms step_avg:363.49ms step:5443/6000 train_loss:3.1666 train_time:1974726ms step_avg:363.47ms step:5444/6000 train_loss:3.3419 train_time:1974984ms step_avg:363.45ms step:5445/6000 train_loss:3.2317 train_time:1975238ms step_avg:363.43ms step:5446/6000 train_loss:3.4824 train_time:1975493ms step_avg:363.41ms step:5447/6000 train_loss:3.2509 train_time:1975748ms step_avg:363.39ms step:5448/6000 train_loss:3.3484 train_time:1976002ms step_avg:363.37ms step:5449/6000 train_loss:3.3020 train_time:1976257ms step_avg:363.35ms step:5450/6000 train_loss:3.4200 train_time:1976512ms step_avg:363.33ms step:5451/6000 train_loss:3.4136 train_time:1977838ms step_avg:363.51ms step:5452/6000 train_loss:4.1216 train_time:1978090ms step_avg:363.49ms step:5453/6000 train_loss:3.3283 train_time:1978345ms step_avg:363.47ms step:5454/6000 train_loss:3.5133 train_time:1978600ms step_avg:363.45ms step:5455/6000 train_loss:3.2050 train_time:1978855ms step_avg:363.43ms step:5456/6000 train_loss:3.1436 train_time:1979110ms step_avg:363.41ms step:5457/6000 train_loss:3.2789 train_time:1979365ms step_avg:363.39ms step:5458/6000 train_loss:3.4637 train_time:1979619ms step_avg:363.37ms step:5459/6000 train_loss:3.3585 train_time:1979875ms step_avg:363.35ms step:5460/6000 train_loss:3.2584 train_time:1980130ms step_avg:363.33ms step:5461/6000 train_loss:3.2246 train_time:1981457ms step_avg:363.50ms step:5462/6000 train_loss:4.2097 train_time:1981709ms step_avg:363.48ms step:5463/6000 train_loss:3.1952 train_time:1981963ms step_avg:363.46ms step:5464/6000 train_loss:3.3915 train_time:1982216ms step_avg:363.44ms step:5465/6000 train_loss:3.3359 train_time:1982471ms step_avg:363.42ms step:5466/6000 train_loss:3.3060 train_time:1982726ms step_avg:363.40ms step:5467/6000 train_loss:3.4764 train_time:1982980ms step_avg:363.38ms step:5468/6000 train_loss:3.2619 train_time:1983236ms step_avg:363.36ms step:5469/6000 train_loss:3.1685 train_time:1983490ms step_avg:363.34ms step:5470/6000 train_loss:3.1319 train_time:1983745ms step_avg:363.32ms step:5471/6000 train_loss:3.2538 train_time:1985072ms step_avg:363.50ms step:5472/6000 train_loss:3.3220 train_time:1985324ms step_avg:363.48ms step:5473/6000 train_loss:3.2781 train_time:1985579ms step_avg:363.46ms step:5474/6000 train_loss:3.4992 train_time:1985833ms step_avg:363.44ms step:5475/6000 train_loss:3.4092 train_time:1986087ms step_avg:363.42ms step:5476/6000 train_loss:3.4625 train_time:1986341ms step_avg:363.40ms step:5477/6000 train_loss:3.4483 train_time:1986595ms step_avg:363.38ms step:5478/6000 train_loss:3.2223 train_time:1986848ms step_avg:363.36ms step:5479/6000 train_loss:3.3569 train_time:1987103ms step_avg:363.34ms step:5480/6000 train_loss:3.3282 train_time:1987358ms step_avg:363.32ms step:5481/6000 train_loss:3.3814 train_time:1988818ms step_avg:363.52ms step:5482/6000 train_loss:3.4414 train_time:1989071ms step_avg:363.50ms step:5483/6000 train_loss:3.2806 train_time:1989327ms step_avg:363.48ms step:5484/6000 train_loss:3.3406 train_time:1989581ms step_avg:363.46ms step:5485/6000 train_loss:3.2786 train_time:1989834ms step_avg:363.44ms step:5486/6000 train_loss:3.3723 train_time:1990089ms step_avg:363.42ms step:5487/6000 train_loss:3.4209 train_time:1990343ms step_avg:363.40ms step:5488/6000 train_loss:3.2825 train_time:1990599ms step_avg:363.38ms step:5489/6000 train_loss:3.6145 train_time:1990853ms step_avg:363.36ms step:5490/6000 train_loss:3.3054 train_time:1991109ms step_avg:363.34ms step:5491/6000 train_loss:3.3688 train_time:1992436ms step_avg:363.52ms step:5492/6000 train_loss:3.4294 train_time:1992687ms step_avg:363.50ms step:5493/6000 train_loss:3.1854 train_time:1992942ms step_avg:363.48ms step:5494/6000 train_loss:3.4220 train_time:1993196ms step_avg:363.46ms step:5495/6000 train_loss:3.2779 train_time:1993450ms step_avg:363.44ms step:5496/6000 train_loss:3.2246 train_time:1993704ms step_avg:363.42ms step:5497/6000 train_loss:3.2378 train_time:1993959ms step_avg:363.40ms step:5498/6000 train_loss:3.2531 train_time:1994214ms step_avg:363.38ms step:5499/6000 train_loss:3.4282 train_time:1994468ms step_avg:363.36ms step:5500/6000 train_loss:3.3176 train_time:1994724ms step_avg:363.34ms step:5500/6000 val_loss:3.3048 train_time:1994758ms step_avg:363.34ms step:5501/6000 train_loss:3.3508 train_time:1996055ms step_avg:363.51ms step:5502/6000 train_loss:3.2605 train_time:1996308ms step_avg:363.49ms step:5503/6000 train_loss:3.4264 train_time:1996562ms step_avg:363.47ms step:5504/6000 train_loss:3.2276 train_time:1996818ms step_avg:363.45ms step:5505/6000 train_loss:3.7808 train_time:1997073ms step_avg:363.43ms step:5506/6000 train_loss:3.3104 train_time:1997328ms step_avg:363.41ms step:5507/6000 train_loss:3.3349 train_time:1997583ms step_avg:363.40ms step:5508/6000 train_loss:3.2520 train_time:1997838ms step_avg:363.38ms step:5509/6000 train_loss:3.3211 train_time:1998092ms step_avg:363.36ms step:5510/6000 train_loss:3.3511 train_time:1998534ms step_avg:363.37ms step:5511/6000 train_loss:3.8088 train_time:1999860ms step_avg:363.54ms step:5512/6000 train_loss:3.4200 train_time:2000113ms step_avg:363.52ms step:5513/6000 train_loss:3.2960 train_time:2000366ms step_avg:363.50ms step:5514/6000 train_loss:3.3210 train_time:2000621ms step_avg:363.48ms step:5515/6000 train_loss:3.5302 train_time:2000877ms step_avg:363.47ms step:5516/6000 train_loss:3.2918 train_time:2001131ms step_avg:363.45ms step:5517/6000 train_loss:3.2738 train_time:2001385ms step_avg:363.43ms step:5518/6000 train_loss:3.3994 train_time:2001641ms step_avg:363.41ms step:5519/6000 train_loss:3.3279 train_time:2001896ms step_avg:363.39ms step:5520/6000 train_loss:3.2214 train_time:2002150ms step_avg:363.37ms step:5521/6000 train_loss:3.2956 train_time:2003477ms step_avg:363.54ms step:5522/6000 train_loss:3.3046 train_time:2003729ms step_avg:363.52ms step:5523/6000 train_loss:3.3888 train_time:2003984ms step_avg:363.50ms step:5524/6000 train_loss:3.3081 train_time:2004238ms step_avg:363.48ms step:5525/6000 train_loss:3.3537 train_time:2004493ms step_avg:363.46ms step:5526/6000 train_loss:3.4472 train_time:2004747ms step_avg:363.44ms step:5527/6000 train_loss:3.2816 train_time:2005002ms step_avg:363.42ms step:5528/6000 train_loss:3.2678 train_time:2005257ms step_avg:363.40ms step:5529/6000 train_loss:3.3310 train_time:2005511ms step_avg:363.38ms step:5530/6000 train_loss:3.3081 train_time:2005765ms step_avg:363.36ms step:5531/6000 train_loss:3.3017 train_time:2007092ms step_avg:363.54ms step:5532/6000 train_loss:3.3011 train_time:2007343ms step_avg:363.52ms step:5533/6000 train_loss:3.2298 train_time:2007599ms step_avg:363.50ms step:5534/6000 train_loss:3.2806 train_time:2007853ms step_avg:363.48ms step:5535/6000 train_loss:3.3615 train_time:2008107ms step_avg:363.46ms step:5536/6000 train_loss:3.2448 train_time:2008361ms step_avg:363.44ms step:5537/6000 train_loss:3.3754 train_time:2008617ms step_avg:363.42ms step:5538/6000 train_loss:3.3277 train_time:2008871ms step_avg:363.40ms step:5539/6000 train_loss:3.3268 train_time:2009126ms step_avg:363.38ms step:5540/6000 train_loss:3.4280 train_time:2009381ms step_avg:363.36ms step:5541/6000 train_loss:3.2850 train_time:2010708ms step_avg:363.53ms step:5542/6000 train_loss:3.2791 train_time:2010960ms step_avg:363.51ms step:5543/6000 train_loss:3.2265 train_time:2011213ms step_avg:363.49ms step:5544/6000 train_loss:3.4884 train_time:2011468ms step_avg:363.47ms step:5545/6000 train_loss:3.2715 train_time:2011723ms step_avg:363.45ms step:5546/6000 train_loss:3.4430 train_time:2011978ms step_avg:363.44ms step:5547/6000 train_loss:3.4149 train_time:2012232ms step_avg:363.42ms step:5548/6000 train_loss:3.2168 train_time:2012487ms step_avg:363.40ms step:5549/6000 train_loss:3.1756 train_time:2012742ms step_avg:363.38ms step:5550/6000 train_loss:3.9436 train_time:2012997ms step_avg:363.36ms step:5551/6000 train_loss:3.3828 train_time:2014326ms step_avg:363.53ms step:5552/6000 train_loss:3.2628 train_time:2014580ms step_avg:363.51ms step:5553/6000 train_loss:3.3240 train_time:2014833ms step_avg:363.49ms step:5554/6000 train_loss:3.2800 train_time:2015087ms step_avg:363.47ms step:5555/6000 train_loss:3.2729 train_time:2015340ms step_avg:363.45ms step:5556/6000 train_loss:3.1261 train_time:2015595ms step_avg:363.43ms step:5557/6000 train_loss:3.3176 train_time:2015850ms step_avg:363.41ms step:5558/6000 train_loss:3.3895 train_time:2016105ms step_avg:363.39ms step:5559/6000 train_loss:3.3165 train_time:2016360ms step_avg:363.37ms step:5560/6000 train_loss:3.4459 train_time:2016614ms step_avg:363.35ms step:5561/6000 train_loss:3.1693 train_time:2017939ms step_avg:363.53ms step:5562/6000 train_loss:3.2084 train_time:2018193ms step_avg:363.51ms step:5563/6000 train_loss:3.3241 train_time:2018447ms step_avg:363.49ms step:5564/6000 train_loss:3.4047 train_time:2018702ms step_avg:363.47ms step:5565/6000 train_loss:3.5324 train_time:2018956ms step_avg:363.45ms step:5566/6000 train_loss:3.3004 train_time:2019211ms step_avg:363.43ms step:5567/6000 train_loss:3.4444 train_time:2019466ms step_avg:363.41ms step:5568/6000 train_loss:3.4925 train_time:2019720ms step_avg:363.39ms step:5569/6000 train_loss:3.2676 train_time:2019975ms step_avg:363.37ms step:5570/6000 train_loss:3.2776 train_time:2020229ms step_avg:363.35ms step:5571/6000 train_loss:3.3798 train_time:2021557ms step_avg:363.52ms step:5572/6000 train_loss:3.2228 train_time:2021809ms step_avg:363.50ms step:5573/6000 train_loss:3.5099 train_time:2022063ms step_avg:363.48ms step:5574/6000 train_loss:3.2209 train_time:2022318ms step_avg:363.46ms step:5575/6000 train_loss:3.3521 train_time:2022573ms step_avg:363.45ms step:5576/6000 train_loss:3.2888 train_time:2022826ms step_avg:363.43ms step:5577/6000 train_loss:3.3679 train_time:2023082ms step_avg:363.41ms step:5578/6000 train_loss:3.2614 train_time:2023336ms step_avg:363.39ms step:5579/6000 train_loss:3.2005 train_time:2023593ms step_avg:363.37ms step:5580/6000 train_loss:3.1379 train_time:2023847ms step_avg:363.35ms step:5581/6000 train_loss:3.3554 train_time:2025174ms step_avg:363.52ms step:5582/6000 train_loss:3.5855 train_time:2025425ms step_avg:363.50ms step:5583/6000 train_loss:3.3530 train_time:2025680ms step_avg:363.48ms step:5584/6000 train_loss:3.2719 train_time:2025934ms step_avg:363.46ms step:5585/6000 train_loss:3.3172 train_time:2026189ms step_avg:363.44ms step:5586/6000 train_loss:3.2905 train_time:2026445ms step_avg:363.42ms step:5587/6000 train_loss:3.2172 train_time:2026700ms step_avg:363.40ms step:5588/6000 train_loss:3.4168 train_time:2026954ms step_avg:363.38ms step:5589/6000 train_loss:3.2972 train_time:2027208ms step_avg:363.36ms step:5590/6000 train_loss:3.3467 train_time:2027464ms step_avg:363.34ms step:5591/6000 train_loss:3.2211 train_time:2028791ms step_avg:363.52ms step:5592/6000 train_loss:3.2975 train_time:2029042ms step_avg:363.50ms step:5593/6000 train_loss:3.4005 train_time:2029298ms step_avg:363.48ms step:5594/6000 train_loss:3.1496 train_time:2029552ms step_avg:363.46ms step:5595/6000 train_loss:3.3510 train_time:2029807ms step_avg:363.44ms step:5596/6000 train_loss:3.3055 train_time:2030061ms step_avg:363.42ms step:5597/6000 train_loss:3.2820 train_time:2030317ms step_avg:363.40ms step:5598/6000 train_loss:3.5290 train_time:2030571ms step_avg:363.38ms step:5599/6000 train_loss:3.2691 train_time:2030825ms step_avg:363.36ms step:5600/6000 train_loss:3.2219 train_time:2031080ms step_avg:363.34ms step:5601/6000 train_loss:3.4180 train_time:2032407ms step_avg:363.51ms step:5602/6000 train_loss:3.3891 train_time:2032659ms step_avg:363.49ms step:5603/6000 train_loss:3.2626 train_time:2032914ms step_avg:363.47ms step:5604/6000 train_loss:3.2205 train_time:2033168ms step_avg:363.46ms step:5605/6000 train_loss:3.4433 train_time:2033422ms step_avg:363.44ms step:5606/6000 train_loss:3.3237 train_time:2033677ms step_avg:363.42ms step:5607/6000 train_loss:3.2155 train_time:2033931ms step_avg:363.40ms step:5608/6000 train_loss:3.3783 train_time:2034185ms step_avg:363.38ms step:5609/6000 train_loss:3.2492 train_time:2034441ms step_avg:363.36ms step:5610/6000 train_loss:3.3302 train_time:2034696ms step_avg:363.34ms step:5611/6000 train_loss:3.2651 train_time:2036022ms step_avg:363.51ms step:5612/6000 train_loss:3.1981 train_time:2036276ms step_avg:363.49ms step:5613/6000 train_loss:3.3527 train_time:2036531ms step_avg:363.47ms step:5614/6000 train_loss:3.3862 train_time:2036786ms step_avg:363.45ms step:5615/6000 train_loss:3.2416 train_time:2037040ms step_avg:363.43ms step:5616/6000 train_loss:3.3759 train_time:2037295ms step_avg:363.41ms step:5617/6000 train_loss:3.4976 train_time:2037550ms step_avg:363.39ms step:5618/6000 train_loss:3.2377 train_time:2037805ms step_avg:363.37ms step:5619/6000 train_loss:3.3451 train_time:2038060ms step_avg:363.36ms step:5620/6000 train_loss:3.3649 train_time:2038315ms step_avg:363.34ms step:5621/6000 train_loss:3.4808 train_time:2039641ms step_avg:363.51ms step:5622/6000 train_loss:3.3708 train_time:2039894ms step_avg:363.49ms step:5623/6000 train_loss:3.3362 train_time:2040149ms step_avg:363.47ms step:5624/6000 train_loss:3.3499 train_time:2040403ms step_avg:363.45ms step:5625/6000 train_loss:3.4532 train_time:2040657ms step_avg:363.43ms step:5625/6000 val_loss:3.2954 train_time:2040692ms step_avg:363.44ms step:5626/6000 train_loss:3.4031 train_time:2040917ms step_avg:363.41ms step:5627/6000 train_loss:3.4972 train_time:2041172ms step_avg:363.39ms step:5628/6000 train_loss:3.3172 train_time:2041427ms step_avg:363.37ms step:5629/6000 train_loss:3.2688 train_time:2041683ms step_avg:363.35ms step:5630/6000 train_loss:3.2575 train_time:2041938ms step_avg:363.33ms step:5631/6000 train_loss:3.2041 train_time:2043264ms step_avg:363.51ms step:5632/6000 train_loss:3.3107 train_time:2043517ms step_avg:363.49ms step:5633/6000 train_loss:3.3165 train_time:2043771ms step_avg:363.47ms step:5634/6000 train_loss:3.2669 train_time:2044026ms step_avg:363.45ms step:5635/6000 train_loss:3.4283 train_time:2044280ms step_avg:363.43ms step:5636/6000 train_loss:3.3857 train_time:2044536ms step_avg:363.41ms step:5637/6000 train_loss:3.3563 train_time:2044790ms step_avg:363.39ms step:5638/6000 train_loss:3.2873 train_time:2045046ms step_avg:363.37ms step:5639/6000 train_loss:3.2046 train_time:2045301ms step_avg:363.35ms step:5640/6000 train_loss:3.3643 train_time:2045556ms step_avg:363.33ms step:5641/6000 train_loss:3.0811 train_time:2046883ms step_avg:363.50ms step:5642/6000 train_loss:3.2758 train_time:2047138ms step_avg:363.48ms step:5643/6000 train_loss:3.2909 train_time:2047392ms step_avg:363.46ms step:5644/6000 train_loss:3.2368 train_time:2047648ms step_avg:363.44ms step:5645/6000 train_loss:3.3981 train_time:2047903ms step_avg:363.43ms step:5646/6000 train_loss:3.1879 train_time:2048157ms step_avg:363.41ms step:5647/6000 train_loss:3.2109 train_time:2048411ms step_avg:363.39ms step:5648/6000 train_loss:3.1818 train_time:2048668ms step_avg:363.37ms step:5649/6000 train_loss:3.6277 train_time:2048923ms step_avg:363.35ms step:5650/6000 train_loss:3.3179 train_time:2049177ms step_avg:363.33ms step:5651/6000 train_loss:3.2365 train_time:2050503ms step_avg:363.50ms step:5652/6000 train_loss:3.2798 train_time:2050755ms step_avg:363.48ms step:5653/6000 train_loss:3.2542 train_time:2051009ms step_avg:363.46ms step:5654/6000 train_loss:3.2114 train_time:2051265ms step_avg:363.44ms step:5655/6000 train_loss:3.2825 train_time:2051519ms step_avg:363.42ms step:5656/6000 train_loss:3.3506 train_time:2051774ms step_avg:363.40ms step:5657/6000 train_loss:3.4934 train_time:2052030ms step_avg:363.38ms step:5658/6000 train_loss:4.0898 train_time:2052285ms step_avg:363.36ms step:5659/6000 train_loss:3.2178 train_time:2052540ms step_avg:363.35ms step:5660/6000 train_loss:3.4250 train_time:2052794ms step_avg:363.33ms step:5661/6000 train_loss:3.3529 train_time:2054122ms step_avg:363.50ms step:5662/6000 train_loss:3.2908 train_time:2054373ms step_avg:363.48ms step:5663/6000 train_loss:3.2902 train_time:2054627ms step_avg:363.46ms step:5664/6000 train_loss:3.1949 train_time:2054882ms step_avg:363.44ms step:5665/6000 train_loss:3.2327 train_time:2055136ms step_avg:363.42ms step:5666/6000 train_loss:3.3397 train_time:2055390ms step_avg:363.40ms step:5667/6000 train_loss:3.3383 train_time:2055645ms step_avg:363.38ms step:5668/6000 train_loss:3.3543 train_time:2055899ms step_avg:363.36ms step:5669/6000 train_loss:3.3222 train_time:2056156ms step_avg:363.34ms step:5670/6000 train_loss:3.1778 train_time:2056550ms step_avg:363.35ms step:5671/6000 train_loss:3.3710 train_time:2057876ms step_avg:363.52ms step:5672/6000 train_loss:3.2975 train_time:2058127ms step_avg:363.50ms step:5673/6000 train_loss:3.2222 train_time:2058381ms step_avg:363.48ms step:5674/6000 train_loss:3.2771 train_time:2058636ms step_avg:363.46ms step:5675/6000 train_loss:3.2096 train_time:2058889ms step_avg:363.44ms step:5676/6000 train_loss:3.2125 train_time:2059145ms step_avg:363.42ms step:5677/6000 train_loss:3.3751 train_time:2059399ms step_avg:363.40ms step:5678/6000 train_loss:3.4390 train_time:2059653ms step_avg:363.38ms step:5679/6000 train_loss:3.2750 train_time:2059907ms step_avg:363.36ms step:5680/6000 train_loss:3.3666 train_time:2060164ms step_avg:363.34ms step:5681/6000 train_loss:3.3274 train_time:2061489ms step_avg:363.51ms step:5682/6000 train_loss:3.2868 train_time:2061744ms step_avg:363.50ms step:5683/6000 train_loss:3.2778 train_time:2061998ms step_avg:363.48ms step:5684/6000 train_loss:3.2617 train_time:2062254ms step_avg:363.46ms step:5685/6000 train_loss:3.6514 train_time:2062508ms step_avg:363.44ms step:5686/6000 train_loss:3.3798 train_time:2062764ms step_avg:363.42ms step:5687/6000 train_loss:3.3981 train_time:2063018ms step_avg:363.40ms step:5688/6000 train_loss:3.4480 train_time:2063272ms step_avg:363.38ms step:5689/6000 train_loss:3.3510 train_time:2063527ms step_avg:363.36ms step:5690/6000 train_loss:3.1891 train_time:2063783ms step_avg:363.34ms step:5691/6000 train_loss:3.3663 train_time:2065110ms step_avg:363.51ms step:5692/6000 train_loss:3.3514 train_time:2065364ms step_avg:363.49ms step:5693/6000 train_loss:3.3609 train_time:2065618ms step_avg:363.47ms step:5694/6000 train_loss:3.3041 train_time:2065872ms step_avg:363.45ms step:5695/6000 train_loss:3.2783 train_time:2066127ms step_avg:363.43ms step:5696/6000 train_loss:3.3762 train_time:2066381ms step_avg:363.42ms step:5697/6000 train_loss:3.3386 train_time:2066636ms step_avg:363.40ms step:5698/6000 train_loss:3.4729 train_time:2066890ms step_avg:363.38ms step:5699/6000 train_loss:3.4095 train_time:2067145ms step_avg:363.36ms step:5700/6000 train_loss:3.2482 train_time:2067552ms step_avg:363.37ms step:5701/6000 train_loss:3.3273 train_time:2068877ms step_avg:363.53ms step:5702/6000 train_loss:3.2492 train_time:2069132ms step_avg:363.52ms step:5703/6000 train_loss:3.2337 train_time:2069386ms step_avg:363.50ms step:5704/6000 train_loss:3.3402 train_time:2069641ms step_avg:363.48ms step:5705/6000 train_loss:3.5352 train_time:2069895ms step_avg:363.46ms step:5706/6000 train_loss:3.3710 train_time:2070150ms step_avg:363.44ms step:5707/6000 train_loss:3.2749 train_time:2070404ms step_avg:363.42ms step:5708/6000 train_loss:3.3249 train_time:2070660ms step_avg:363.40ms step:5709/6000 train_loss:3.2475 train_time:2070916ms step_avg:363.38ms step:5710/6000 train_loss:3.3221 train_time:2071170ms step_avg:363.36ms step:5711/6000 train_loss:3.3885 train_time:2072496ms step_avg:363.53ms step:5712/6000 train_loss:3.2161 train_time:2072750ms step_avg:363.51ms step:5713/6000 train_loss:3.0749 train_time:2073004ms step_avg:363.49ms step:5714/6000 train_loss:3.4239 train_time:2073258ms step_avg:363.47ms step:5715/6000 train_loss:3.5506 train_time:2073512ms step_avg:363.46ms step:5716/6000 train_loss:3.4379 train_time:2073768ms step_avg:363.44ms step:5717/6000 train_loss:3.2845 train_time:2074022ms step_avg:363.42ms step:5718/6000 train_loss:3.1748 train_time:2074276ms step_avg:363.40ms step:5719/6000 train_loss:3.3842 train_time:2074530ms step_avg:363.38ms step:5720/6000 train_loss:3.1569 train_time:2074785ms step_avg:363.36ms step:5721/6000 train_loss:3.2944 train_time:2076112ms step_avg:363.53ms step:5722/6000 train_loss:3.6735 train_time:2076365ms step_avg:363.51ms step:5723/6000 train_loss:3.3055 train_time:2076619ms step_avg:363.49ms step:5724/6000 train_loss:3.4599 train_time:2076874ms step_avg:363.47ms step:5725/6000 train_loss:3.3580 train_time:2077128ms step_avg:363.45ms step:5726/6000 train_loss:3.1966 train_time:2077385ms step_avg:363.43ms step:5727/6000 train_loss:3.3630 train_time:2077641ms step_avg:363.41ms step:5728/6000 train_loss:3.2671 train_time:2077895ms step_avg:363.40ms step:5729/6000 train_loss:3.2614 train_time:2078151ms step_avg:363.38ms step:5730/6000 train_loss:3.3564 train_time:2078406ms step_avg:363.36ms step:5731/6000 train_loss:3.1914 train_time:2079732ms step_avg:363.53ms step:5732/6000 train_loss:3.3302 train_time:2079986ms step_avg:363.51ms step:5733/6000 train_loss:3.1129 train_time:2080239ms step_avg:363.49ms step:5734/6000 train_loss:3.2849 train_time:2080493ms step_avg:363.47ms step:5735/6000 train_loss:3.1636 train_time:2080748ms step_avg:363.45ms step:5736/6000 train_loss:3.2831 train_time:2081003ms step_avg:363.43ms step:5737/6000 train_loss:3.2982 train_time:2081257ms step_avg:363.41ms step:5738/6000 train_loss:3.2516 train_time:2081512ms step_avg:363.39ms step:5739/6000 train_loss:3.2278 train_time:2081768ms step_avg:363.37ms step:5740/6000 train_loss:3.1755 train_time:2082023ms step_avg:363.35ms step:5741/6000 train_loss:3.4001 train_time:2083349ms step_avg:363.52ms step:5742/6000 train_loss:3.2506 train_time:2083600ms step_avg:363.50ms step:5743/6000 train_loss:3.2767 train_time:2083855ms step_avg:363.48ms step:5744/6000 train_loss:3.1996 train_time:2084109ms step_avg:363.47ms step:5745/6000 train_loss:3.4026 train_time:2084365ms step_avg:363.45ms step:5746/6000 train_loss:3.3939 train_time:2084619ms step_avg:363.43ms step:5747/6000 train_loss:3.2779 train_time:2084872ms step_avg:363.41ms step:5748/6000 train_loss:3.4302 train_time:2085128ms step_avg:363.39ms step:5749/6000 train_loss:3.3452 train_time:2085383ms step_avg:363.37ms step:5750/6000 train_loss:3.3957 train_time:2085638ms step_avg:363.35ms step:5750/6000 val_loss:3.2877 train_time:2085673ms step_avg:363.36ms step:5751/6000 train_loss:3.3491 train_time:2086970ms step_avg:363.52ms step:5752/6000 train_loss:3.3492 train_time:2087221ms step_avg:363.50ms step:5753/6000 train_loss:3.2948 train_time:2087475ms step_avg:363.48ms step:5754/6000 train_loss:3.3293 train_time:2087730ms step_avg:363.46ms step:5755/6000 train_loss:3.2950 train_time:2087984ms step_avg:363.44ms step:5756/6000 train_loss:3.1590 train_time:2088238ms step_avg:363.42ms step:5757/6000 train_loss:3.4011 train_time:2088494ms step_avg:363.41ms step:5758/6000 train_loss:3.3282 train_time:2088748ms step_avg:363.39ms step:5759/6000 train_loss:3.0620 train_time:2089003ms step_avg:363.37ms step:5760/6000 train_loss:3.2016 train_time:2089257ms step_avg:363.35ms step:5761/6000 train_loss:3.2495 train_time:2090583ms step_avg:363.52ms step:5762/6000 train_loss:3.3804 train_time:2090835ms step_avg:363.50ms step:5763/6000 train_loss:3.4236 train_time:2091090ms step_avg:363.48ms step:5764/6000 train_loss:3.4736 train_time:2091343ms step_avg:363.46ms step:5765/6000 train_loss:3.1952 train_time:2091598ms step_avg:363.44ms step:5766/6000 train_loss:3.2919 train_time:2091852ms step_avg:363.42ms step:5767/6000 train_loss:3.5944 train_time:2092106ms step_avg:363.40ms step:5768/6000 train_loss:3.2006 train_time:2092361ms step_avg:363.38ms step:5769/6000 train_loss:3.1968 train_time:2092615ms step_avg:363.36ms step:5770/6000 train_loss:3.3740 train_time:2092871ms step_avg:363.35ms step:5771/6000 train_loss:3.2846 train_time:2094198ms step_avg:363.51ms step:5772/6000 train_loss:3.3901 train_time:2094450ms step_avg:363.49ms step:5773/6000 train_loss:3.5408 train_time:2094704ms step_avg:363.47ms step:5774/6000 train_loss:3.5046 train_time:2094958ms step_avg:363.46ms step:5775/6000 train_loss:3.3894 train_time:2095212ms step_avg:363.44ms step:5776/6000 train_loss:3.3818 train_time:2095466ms step_avg:363.42ms step:5777/6000 train_loss:3.5492 train_time:2095720ms step_avg:363.40ms step:5778/6000 train_loss:3.3443 train_time:2095976ms step_avg:363.38ms step:5779/6000 train_loss:3.3209 train_time:2096231ms step_avg:363.36ms step:5780/6000 train_loss:3.3154 train_time:2096486ms step_avg:363.34ms step:5781/6000 train_loss:3.2355 train_time:2097812ms step_avg:363.51ms step:5782/6000 train_loss:3.4878 train_time:2098065ms step_avg:363.49ms step:5783/6000 train_loss:3.3028 train_time:2098318ms step_avg:363.47ms step:5784/6000 train_loss:3.4287 train_time:2098573ms step_avg:363.45ms step:5785/6000 train_loss:3.2133 train_time:2098826ms step_avg:363.43ms step:5786/6000 train_loss:3.2797 train_time:2099081ms step_avg:363.41ms step:5787/6000 train_loss:3.3690 train_time:2099335ms step_avg:363.40ms step:5788/6000 train_loss:3.4503 train_time:2099591ms step_avg:363.38ms step:5789/6000 train_loss:3.3086 train_time:2099845ms step_avg:363.36ms step:5790/6000 train_loss:3.1837 train_time:2100100ms step_avg:363.34ms step:5791/6000 train_loss:3.4834 train_time:2101427ms step_avg:363.51ms step:5792/6000 train_loss:3.2295 train_time:2101680ms step_avg:363.49ms step:5793/6000 train_loss:3.2445 train_time:2101933ms step_avg:363.47ms step:5794/6000 train_loss:3.2575 train_time:2102188ms step_avg:363.45ms step:5795/6000 train_loss:3.2487 train_time:2102442ms step_avg:363.43ms step:5796/6000 train_loss:3.2681 train_time:2102697ms step_avg:363.41ms step:5797/6000 train_loss:3.2468 train_time:2102951ms step_avg:363.39ms step:5798/6000 train_loss:3.4009 train_time:2103207ms step_avg:363.37ms step:5799/6000 train_loss:3.3220 train_time:2103461ms step_avg:363.35ms step:5800/6000 train_loss:3.2592 train_time:2103717ms step_avg:363.34ms step:5801/6000 train_loss:3.3289 train_time:2105043ms step_avg:363.50ms step:5802/6000 train_loss:3.3527 train_time:2105295ms step_avg:363.48ms step:5803/6000 train_loss:3.5774 train_time:2105549ms step_avg:363.46ms step:5804/6000 train_loss:3.4074 train_time:2105802ms step_avg:363.45ms step:5805/6000 train_loss:3.2748 train_time:2106056ms step_avg:363.43ms step:5806/6000 train_loss:3.1963 train_time:2106311ms step_avg:363.41ms step:5807/6000 train_loss:3.2547 train_time:2106568ms step_avg:363.39ms step:5808/6000 train_loss:3.4267 train_time:2106821ms step_avg:363.37ms step:5809/6000 train_loss:3.1647 train_time:2107076ms step_avg:363.35ms step:5810/6000 train_loss:3.1052 train_time:2107331ms step_avg:363.33ms step:5811/6000 train_loss:3.2896 train_time:2108658ms step_avg:363.50ms step:5812/6000 train_loss:3.2636 train_time:2108909ms step_avg:363.48ms step:5813/6000 train_loss:3.2333 train_time:2109164ms step_avg:363.46ms step:5814/6000 train_loss:3.5535 train_time:2109417ms step_avg:363.44ms step:5815/6000 train_loss:3.2859 train_time:2109672ms step_avg:363.42ms step:5816/6000 train_loss:3.4492 train_time:2109926ms step_avg:363.40ms step:5817/6000 train_loss:3.4043 train_time:2110181ms step_avg:363.39ms step:5818/6000 train_loss:3.1334 train_time:2110436ms step_avg:363.37ms step:5819/6000 train_loss:3.3556 train_time:2110692ms step_avg:363.35ms step:5820/6000 train_loss:3.4242 train_time:2110946ms step_avg:363.33ms step:5821/6000 train_loss:3.3425 train_time:2112272ms step_avg:363.50ms step:5822/6000 train_loss:3.1940 train_time:2112524ms step_avg:363.48ms step:5823/6000 train_loss:3.2972 train_time:2112779ms step_avg:363.46ms step:5824/6000 train_loss:3.3441 train_time:2113032ms step_avg:363.44ms step:5825/6000 train_loss:3.2436 train_time:2113287ms step_avg:363.42ms step:5826/6000 train_loss:3.1592 train_time:2113541ms step_avg:363.40ms step:5827/6000 train_loss:3.1727 train_time:2113796ms step_avg:363.38ms step:5828/6000 train_loss:3.1910 train_time:2114051ms step_avg:363.36ms step:5829/6000 train_loss:3.2451 train_time:2114306ms step_avg:363.35ms step:5830/6000 train_loss:3.2683 train_time:2114562ms step_avg:363.33ms step:5831/6000 train_loss:3.3440 train_time:2115888ms step_avg:363.49ms step:5832/6000 train_loss:3.3805 train_time:2116140ms step_avg:363.47ms step:5833/6000 train_loss:3.1471 train_time:2116395ms step_avg:363.45ms step:5834/6000 train_loss:3.8443 train_time:2116649ms step_avg:363.44ms step:5835/6000 train_loss:3.4208 train_time:2116903ms step_avg:363.42ms step:5836/6000 train_loss:3.3075 train_time:2117159ms step_avg:363.40ms step:5837/6000 train_loss:3.2650 train_time:2117414ms step_avg:363.38ms step:5838/6000 train_loss:3.3311 train_time:2117669ms step_avg:363.36ms step:5839/6000 train_loss:3.2501 train_time:2117924ms step_avg:363.34ms step:5840/6000 train_loss:3.2854 train_time:2118179ms step_avg:363.32ms step:5841/6000 train_loss:3.0542 train_time:2119504ms step_avg:363.49ms step:5842/6000 train_loss:3.3905 train_time:2119755ms step_avg:363.47ms step:5843/6000 train_loss:3.2351 train_time:2120011ms step_avg:363.45ms step:5844/6000 train_loss:3.5827 train_time:2120266ms step_avg:363.43ms step:5845/6000 train_loss:3.3635 train_time:2120520ms step_avg:363.41ms step:5846/6000 train_loss:3.3274 train_time:2120775ms step_avg:363.40ms step:5847/6000 train_loss:3.4431 train_time:2121030ms step_avg:363.38ms step:5848/6000 train_loss:3.4043 train_time:2121285ms step_avg:363.36ms step:5849/6000 train_loss:3.7787 train_time:2121540ms step_avg:363.34ms step:5850/6000 train_loss:3.3362 train_time:2121795ms step_avg:363.32ms step:5851/6000 train_loss:3.4075 train_time:2123121ms step_avg:363.49ms step:5852/6000 train_loss:3.3674 train_time:2123374ms step_avg:363.47ms step:5853/6000 train_loss:3.2875 train_time:2123628ms step_avg:363.45ms step:5854/6000 train_loss:3.2315 train_time:2123884ms step_avg:363.43ms step:5855/6000 train_loss:3.2023 train_time:2124138ms step_avg:363.41ms step:5856/6000 train_loss:3.3793 train_time:2124394ms step_avg:363.39ms step:5857/6000 train_loss:3.3207 train_time:2124649ms step_avg:363.37ms step:5858/6000 train_loss:3.3508 train_time:2124904ms step_avg:363.36ms step:5859/6000 train_loss:3.2560 train_time:2125303ms step_avg:363.36ms step:5860/6000 train_loss:3.4625 train_time:2125557ms step_avg:363.34ms step:5861/6000 train_loss:3.2154 train_time:2126885ms step_avg:363.51ms step:5862/6000 train_loss:3.1760 train_time:2127137ms step_avg:363.49ms step:5863/6000 train_loss:3.3803 train_time:2127391ms step_avg:363.47ms step:5864/6000 train_loss:3.2390 train_time:2127645ms step_avg:363.45ms step:5865/6000 train_loss:3.4195 train_time:2127900ms step_avg:363.43ms step:5866/6000 train_loss:3.2330 train_time:2128154ms step_avg:363.41ms step:5867/6000 train_loss:3.3531 train_time:2128410ms step_avg:363.40ms step:5868/6000 train_loss:3.2259 train_time:2128665ms step_avg:363.38ms step:5869/6000 train_loss:3.5031 train_time:2128919ms step_avg:363.36ms step:5870/6000 train_loss:3.2701 train_time:2129174ms step_avg:363.34ms step:5871/6000 train_loss:3.2045 train_time:2130501ms step_avg:363.50ms step:5872/6000 train_loss:3.3606 train_time:2130752ms step_avg:363.49ms step:5873/6000 train_loss:3.2813 train_time:2131007ms step_avg:363.47ms step:5874/6000 train_loss:3.2943 train_time:2131261ms step_avg:363.45ms step:5875/6000 train_loss:3.2466 train_time:2131516ms step_avg:363.43ms step:5875/6000 val_loss:3.2802 train_time:2131551ms step_avg:363.44ms step:5876/6000 train_loss:3.2713 train_time:2131776ms step_avg:363.41ms step:5877/6000 train_loss:3.3568 train_time:2132031ms step_avg:363.39ms step:5878/6000 train_loss:3.3376 train_time:2132287ms step_avg:363.38ms step:5879/6000 train_loss:3.3163 train_time:2132542ms step_avg:363.36ms step:5880/6000 train_loss:3.1226 train_time:2132796ms step_avg:363.34ms step:5881/6000 train_loss:3.2550 train_time:2134123ms step_avg:363.50ms step:5882/6000 train_loss:3.2937 train_time:2134375ms step_avg:363.48ms step:5883/6000 train_loss:3.3422 train_time:2134630ms step_avg:363.46ms step:5884/6000 train_loss:3.3698 train_time:2134885ms step_avg:363.45ms step:5885/6000 train_loss:3.3488 train_time:2135139ms step_avg:363.43ms step:5886/6000 train_loss:3.3016 train_time:2135395ms step_avg:363.41ms step:5887/6000 train_loss:3.5965 train_time:2135650ms step_avg:363.39ms step:5888/6000 train_loss:3.3937 train_time:2135904ms step_avg:363.37ms step:5889/6000 train_loss:3.3443 train_time:2136161ms step_avg:363.35ms step:5890/6000 train_loss:3.0604 train_time:2136604ms step_avg:363.37ms step:5891/6000 train_loss:3.3444 train_time:2137930ms step_avg:363.53ms step:5892/6000 train_loss:3.2999 train_time:2138182ms step_avg:363.51ms step:5893/6000 train_loss:3.0347 train_time:2138436ms step_avg:363.49ms step:5894/6000 train_loss:3.1939 train_time:2138692ms step_avg:363.48ms step:5895/6000 train_loss:3.2929 train_time:2138945ms step_avg:363.46ms step:5896/6000 train_loss:3.5358 train_time:2139200ms step_avg:363.44ms step:5897/6000 train_loss:3.3032 train_time:2139455ms step_avg:363.42ms step:5898/6000 train_loss:3.2117 train_time:2139711ms step_avg:363.40ms step:5899/6000 train_loss:3.2786 train_time:2139966ms step_avg:363.38ms step:5900/6000 train_loss:3.1945 train_time:2140221ms step_avg:363.37ms step:5901/6000 train_loss:3.1522 train_time:2141548ms step_avg:363.53ms step:5902/6000 train_loss:3.2292 train_time:2141802ms step_avg:363.51ms step:5903/6000 train_loss:3.4157 train_time:2142057ms step_avg:363.49ms step:5904/6000 train_loss:3.3206 train_time:2142312ms step_avg:363.47ms step:5905/6000 train_loss:3.2773 train_time:2142567ms step_avg:363.46ms step:5906/6000 train_loss:3.4222 train_time:2142822ms step_avg:363.44ms step:5907/6000 train_loss:3.3586 train_time:2143077ms step_avg:363.42ms step:5908/6000 train_loss:3.2749 train_time:2143332ms step_avg:363.40ms step:5909/6000 train_loss:3.6967 train_time:2143587ms step_avg:363.38ms step:5910/6000 train_loss:3.5943 train_time:2143842ms step_avg:363.36ms step:5911/6000 train_loss:3.3069 train_time:2145168ms step_avg:363.53ms step:5912/6000 train_loss:3.2133 train_time:2145421ms step_avg:363.51ms step:5913/6000 train_loss:3.4518 train_time:2145676ms step_avg:363.49ms step:5914/6000 train_loss:3.2588 train_time:2145930ms step_avg:363.47ms step:5915/6000 train_loss:3.4114 train_time:2146185ms step_avg:363.45ms step:5916/6000 train_loss:3.2885 train_time:2146439ms step_avg:363.43ms step:5917/6000 train_loss:3.3499 train_time:2146694ms step_avg:363.42ms step:5918/6000 train_loss:3.1889 train_time:2146950ms step_avg:363.40ms step:5919/6000 train_loss:3.3298 train_time:2147205ms step_avg:363.38ms step:5920/6000 train_loss:3.2154 train_time:2147459ms step_avg:363.36ms step:5921/6000 train_loss:3.4223 train_time:2148788ms step_avg:363.52ms step:5922/6000 train_loss:3.3066 train_time:2149039ms step_avg:363.50ms step:5923/6000 train_loss:3.3291 train_time:2149293ms step_avg:363.49ms step:5924/6000 train_loss:3.2313 train_time:2149547ms step_avg:363.47ms step:5925/6000 train_loss:3.3522 train_time:2149802ms step_avg:363.45ms step:5926/6000 train_loss:3.2173 train_time:2150056ms step_avg:363.43ms step:5927/6000 train_loss:3.2675 train_time:2150312ms step_avg:363.41ms step:5928/6000 train_loss:3.2890 train_time:2150567ms step_avg:363.39ms step:5929/6000 train_loss:3.4048 train_time:2150821ms step_avg:363.38ms step:5930/6000 train_loss:3.3076 train_time:2151077ms step_avg:363.36ms step:5931/6000 train_loss:3.4407 train_time:2152403ms step_avg:363.52ms step:5932/6000 train_loss:3.4324 train_time:2152655ms step_avg:363.50ms step:5933/6000 train_loss:3.0624 train_time:2152910ms step_avg:363.48ms step:5934/6000 train_loss:3.1877 train_time:2153164ms step_avg:363.46ms step:5935/6000 train_loss:3.2595 train_time:2153418ms step_avg:363.45ms step:5936/6000 train_loss:3.3234 train_time:2153673ms step_avg:363.43ms step:5937/6000 train_loss:3.1719 train_time:2153927ms step_avg:363.41ms step:5938/6000 train_loss:3.2617 train_time:2154182ms step_avg:363.39ms step:5939/6000 train_loss:3.3804 train_time:2154437ms step_avg:363.37ms step:5940/6000 train_loss:3.2783 train_time:2154692ms step_avg:363.35ms step:5941/6000 train_loss:3.2317 train_time:2156017ms step_avg:363.52ms step:5942/6000 train_loss:3.2183 train_time:2156271ms step_avg:363.50ms step:5943/6000 train_loss:3.2254 train_time:2156525ms step_avg:363.48ms step:5944/6000 train_loss:3.3290 train_time:2156779ms step_avg:363.46ms step:5945/6000 train_loss:3.2874 train_time:2157033ms step_avg:363.44ms step:5946/6000 train_loss:3.2972 train_time:2157289ms step_avg:363.42ms step:5947/6000 train_loss:3.1989 train_time:2157543ms step_avg:363.41ms step:5948/6000 train_loss:3.3570 train_time:2157797ms step_avg:363.39ms step:5949/6000 train_loss:3.4444 train_time:2158051ms step_avg:363.37ms step:5950/6000 train_loss:3.2354 train_time:2158307ms step_avg:363.35ms step:5951/6000 train_loss:3.2696 train_time:2159633ms step_avg:363.51ms step:5952/6000 train_loss:3.3096 train_time:2159887ms step_avg:363.49ms step:5953/6000 train_loss:3.4902 train_time:2160142ms step_avg:363.48ms step:5954/6000 train_loss:3.2948 train_time:2160396ms step_avg:363.46ms step:5955/6000 train_loss:3.1629 train_time:2160650ms step_avg:363.44ms step:5956/6000 train_loss:3.2288 train_time:2160903ms step_avg:363.42ms step:5957/6000 train_loss:3.2901 train_time:2161158ms step_avg:363.40ms step:5958/6000 train_loss:3.2393 train_time:2161413ms step_avg:363.38ms step:5959/6000 train_loss:3.3454 train_time:2161669ms step_avg:363.37ms step:5960/6000 train_loss:3.3234 train_time:2161923ms step_avg:363.35ms step:5961/6000 train_loss:3.3045 train_time:2163249ms step_avg:363.51ms step:5962/6000 train_loss:3.1265 train_time:2163502ms step_avg:363.49ms step:5963/6000 train_loss:3.2723 train_time:2163756ms step_avg:363.47ms step:5964/6000 train_loss:3.2813 train_time:2164014ms step_avg:363.46ms step:5965/6000 train_loss:3.1227 train_time:2164267ms step_avg:363.44ms step:5966/6000 train_loss:3.4700 train_time:2164522ms step_avg:363.42ms step:5967/6000 train_loss:3.1469 train_time:2164777ms step_avg:363.40ms step:5968/6000 train_loss:3.2958 train_time:2165031ms step_avg:363.38ms step:5969/6000 train_loss:3.2722 train_time:2165287ms step_avg:363.36ms step:5970/6000 train_loss:3.1803 train_time:2165542ms step_avg:363.35ms step:5971/6000 train_loss:3.1688 train_time:2166869ms step_avg:363.51ms step:5972/6000 train_loss:3.3001 train_time:2167120ms step_avg:363.49ms step:5973/6000 train_loss:3.4427 train_time:2167374ms step_avg:363.47ms step:5974/6000 train_loss:3.3191 train_time:2167629ms step_avg:363.45ms step:5975/6000 train_loss:3.1818 train_time:2167883ms step_avg:363.43ms step:5976/6000 train_loss:3.2108 train_time:2168137ms step_avg:363.42ms step:5977/6000 train_loss:3.2586 train_time:2168393ms step_avg:363.40ms step:5978/6000 train_loss:3.2499 train_time:2168647ms step_avg:363.38ms step:5979/6000 train_loss:3.1944 train_time:2168902ms step_avg:363.36ms step:5980/6000 train_loss:3.3614 train_time:2169156ms step_avg:363.34ms step:5981/6000 train_loss:3.3818 train_time:2170484ms step_avg:363.50ms step:5982/6000 train_loss:3.2204 train_time:2170735ms step_avg:363.49ms step:5983/6000 train_loss:3.2301 train_time:2170990ms step_avg:363.47ms step:5984/6000 train_loss:3.3268 train_time:2171244ms step_avg:363.45ms step:5985/6000 train_loss:3.2783 train_time:2171498ms step_avg:363.43ms step:5986/6000 train_loss:3.1935 train_time:2171752ms step_avg:363.41ms step:5987/6000 train_loss:3.2191 train_time:2172009ms step_avg:363.39ms step:5988/6000 train_loss:3.2154 train_time:2172263ms step_avg:363.38ms step:5989/6000 train_loss:3.3095 train_time:2172517ms step_avg:363.36ms step:5990/6000 train_loss:3.5055 train_time:2172773ms step_avg:363.34ms step:5991/6000 train_loss:3.1354 train_time:2174102ms step_avg:363.50ms step:5992/6000 train_loss:3.4091 train_time:2174353ms step_avg:363.48ms step:5993/6000 train_loss:3.2385 train_time:2174608ms step_avg:363.46ms step:5994/6000 train_loss:3.2593 train_time:2174862ms step_avg:363.45ms step:5995/6000 train_loss:3.3224 train_time:2175116ms step_avg:363.43ms step:5996/6000 train_loss:3.3543 train_time:2175371ms step_avg:363.41ms step:5997/6000 train_loss:3.3433 train_time:2175625ms step_avg:363.39ms step:5998/6000 train_loss:3.2500 train_time:2175880ms step_avg:363.37ms step:5999/6000 train_loss:3.0936 train_time:2176134ms step_avg:363.36ms step:6000/6000 train_loss:3.4305 train_time:2176390ms step_avg:363.34ms step:6000/6000 val_loss:3.2763 train_time:2176424ms step_avg:363.34ms