==================================================================================================== import os import sys with open(sys.argv[0]) as f: code = f.read() # read the code of this file ASAP, for logging import uuid import glob import time from dataclasses import dataclass import numpy as np import torch from torch import nn import torch.nn.functional as F import torch.distributed as dist import torch._inductor.config as config from torch.nn.parallel import DistributedDataParallel as DDP # ----------------------------------------------------------------------------- # Muon optimizer def zeropower_via_svd(G, steps=None): U, S, V = G.svd() return U @ V.T @torch.compile def zeropower_via_newtonschulz5(G, steps=10, eps=1e-7): """ Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. We opt to use a quintic iteration whose coefficients are selected to maximize the slope at zero. For the purpose of minimizing steps, it turns out to be empirically effective to keep increasing the slope at zero even beyond the point where the iteration no longer converges all the way to one everywhere on the interval. This iteration therefore does not produce UV^T but rather something like US'V^T where S' is diagonal with S_{ii}' ~ Uniform(0.5, 1.5), which turns out not to hurt model performance at all relative to UV^T, where USV^T = G is the SVD. """ assert len(G.shape) == 2 a, b, c = (3.4445, -4.7750, 2.0315) X = G.bfloat16() X /= (X.norm() + eps) # ensure top singular value <= 1 if G.size(0) > G.size(1): X = X.T for _ in range(steps): A = X @ X.T B = b * A + c * A @ A # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng X = a * X + B @ X if G.size(0) > G.size(1): X = X.T return X zeropower_backends = dict(svd=zeropower_via_svd, newtonschulz5=zeropower_via_newtonschulz5) class Muon(torch.optim.Optimizer): """ Muon - MomentUm Orthogonalized by Newton-schulz Muon internally runs standard SGD-momentum, and then performs an orthogonalization post- processing step, in which each 2D parameter's update is replaced with the nearest orthogonal matrix. To efficiently orthogonalize each update, we use a Newton-Schulz iteration, which has the advantage that it can be stably run in bfloat16 on the GPU. Some warnings: - This optimizer assumes that all parameters passed in are 2D. - It should not be used for the embedding layer, the final fully connected layer, or any {0,1}-D parameters; those should all be optimized by a standard method (e.g., AdamW). - To use it with 4D convolutional filters, it works well to just flatten their last 3 dimensions. - We believe it is unlikely to work well for training with small batch size. - We believe it may not work well for finetuning pretrained models, but we haven't tested this. - We have not yet tried this optimizer for training scenarios larger than NanoGPT (124M). Arguments: lr: The learning rate used by the internal SGD. momentum: The momentum used by the internal SGD. nesterov: Whether to use Nesterov-style momentum in the internal SGD. (recommended) backend: The chosen backend for the orthogonalization step. (recommended: 'newtonschulz5') backend_steps: The number of iteration steps to use in the backend, if it is iterative. """ def __init__(self, params, lr=0.02, momentum=0.95, nesterov=True, backend='newtonschulz5', backend_steps=5): defaults = dict(lr=lr, momentum=momentum, nesterov=nesterov, backend=backend, backend_steps=backend_steps) super().__init__(params, defaults) def step(self): for group in self.param_groups: lr = group['lr'] momentum = group['momentum'] zeropower_backend = zeropower_backends[group['backend']] # generate weight updates in distributed fashion total_params = sum(p.numel() for p in group['params']) updates_flat = torch.zeros(total_params, device='cuda', dtype=torch.bfloat16) curr_idx = 0 for i, p in enumerate(group['params']): # luckily this will perfectly distribute a transformer with multiple of 4 layers to 8 GPUs if i % int(os.environ['WORLD_SIZE']) == int(os.environ['RANK']): g = p.grad assert g is not None state = self.state[p] if 'momentum_buffer' not in state: state['momentum_buffer'] = torch.zeros_like(g) buf = state['momentum_buffer'] buf.mul_(momentum).add_(g) if group['nesterov']: g = g.add(buf, alpha=momentum) g = zeropower_backend(g, steps=group['backend_steps']) g *= max(1, g.size(0)/g.size(1))**0.5 updates_flat[curr_idx:curr_idx+p.numel()] = g.flatten() curr_idx += p.numel() # sync updates across devices. we are not memory-constrained so can do this simple deserialization dist.all_reduce(updates_flat, op=dist.ReduceOp.SUM) # deserialize and apply updates curr_idx = 0 for p in group['params']: g = updates_flat[curr_idx:curr_idx+p.numel()].view_as(p.data).type_as(p.data) p.data.add_(g, alpha=-lr) curr_idx += p.numel() # ----------------------------------------------------------------------------- # PyTorch nn.Module definitions for the GPT-2 model class Rotary(torch.nn.Module): def __init__(self, dim, base=10000): super().__init__() self.dim = dim self.base = base self.inv_freq = None self.seq_len_cached = None self.cos_cached = None self.sin_cached = None def forward(self, x): seq_len = x.shape[1] if seq_len != self.seq_len_cached: self.inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2, device=x.device).float() / self.dim)) self.seq_len_cached = seq_len t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq) freqs = torch.outer(t, self.inv_freq) self.cos_cached = freqs.cos().bfloat16() self.sin_cached = freqs.sin().bfloat16() return self.cos_cached[None, :, None, :], self.sin_cached[None, :, None, :] def apply_rotary_emb(x, cos, sin): assert x.ndim == 4 # multihead attention d = x.shape[3]//2 x1 = x[..., :d] x2 = x[..., d:] y1 = x1 * cos + x2 * sin y2 = x1 * (-sin) + x2 * cos return torch.cat([y1, y2], 3).type_as(x) torch._dynamo.disable def cast_tensor(x, M, E, A): """ Rounds every value in the tensor x to the nearest representable floating point number. Where the floating point representation has M mantissa bits, smallest exponent A, and E exponent bits. Therefore (only considering positive numbers): * The subnormal numbers will be {0, 2**-M * 2**A), 2 * 2**-M * 2**A, ..., (2**M - 1) * 2**-M * 2**A} * The smallest denormal number will be 2**A * The largest denormal number will be 2**(A+2**E-2) * (2 - 2**-M) (So the (largest / smallest) denormal number ratio is roughly 2**(2**E-1)) Examples: * torch.half is M, E, A = 10, 5, -14; (modulo that the real format uses max exponent for NaN) * torch.float8_e5m2 is M, E, A = 2, 5, -14 (also modulo that the real format uses max exponent for NaN) * torch.float8_e4m3fn is M, E, A = 3, 4, -6 (modulo that the real format uses max denormal for NaN) * int8 is M, E, A = 7, 0, 7; this represents ±{0, 1, ..., 127}. (modulo that real int8 also has -128) * ternary weights are M, E, A = 0, 1, 0, this represents {-1, 0, +1}. * you could even have M, E, A = 0, 3, -2; this represents ±{0, 0.25, 0.5, 1, 2, 4, 8, 16}. In every case, the number of represented positive numbers is 2**(M+E). """ xp = x.detach().abs() mantissa, exponent = torch.frexp(xp) mantissa *= 2 # bring mantissa into the range [1, 2) instead of [0.5, 1) exponent -= 1 # assert (2**exponent * mantissa == xp).all(), x[2**exponent * mantissa != xp] # Round mantissa to given precision mantissa = (1 + 2**-M * ((mantissa - 1) * 2**M).round()) # Handle subnormals separately mask = (exponent < A) mantissa[mask] = (xp[mask] * 2**(M-A)).round() / 2**M exponent[mask] = A mask = (mantissa == 2) mantissa[mask] = 1 exponent[mask] += 1 # Truncate top of range if E > 0: B = A+2**E-2 mask = (exponent > B) mantissa[mask] = 2 - 2**-M exponent[mask] = B else: # zero-bit exponent case: so we have only subnormal numbers (like an int8) mask = (mantissa >= 1) mantissa[mask] = 1 - 2**-M exponent[mask] = A y = x.clone() y.data.copy_(x.sign() * exponent.exp2() * mantissa) return y MEA_WEIGHT = (0, 3, -6) class CastedLinear(nn.Linear): def forward(self, x): w = self.weight if len(w) < 10000: # skip the lm_head #s = (w.data.abs().mean() + 1e-5) s = 1 w = s * cast_tensor(self.weight / s, *MEA_WEIGHT) return F.linear(x, w.to(x.dtype)) #return cast_tensor(F.linear(x, w), MEA_ACTIVATION) class CausalSelfAttention(nn.Module): def __init__(self, config): super().__init__() self.n_head = config.n_head self.n_embd = config.n_embd self.head_dim = self.n_embd // self.n_head assert self.n_embd % self.n_head == 0 self.c_q = CastedLinear(self.n_embd, self.n_embd, bias=False) self.c_k = CastedLinear(self.n_embd, self.n_embd, bias=False) self.c_v = CastedLinear(self.n_embd, self.n_embd, bias=False) # output projection self.c_proj = CastedLinear(self.n_embd, self.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 self.rotary = Rotary(self.head_dim) self.lamb = nn.Parameter(torch.tensor(0.5)) # @Grad62304977 def forward(self, x, v1=None): B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd) q = self.c_q(x).view(B, T, self.n_head, self.head_dim) k = self.c_k(x).view(B, T, self.n_head, self.head_dim) v = self.c_v(x).view(B, T, self.n_head, self.head_dim) if v1 is None: v1 = v # This happens if we are in the first block. v needs to be accessed by subsequent blocks v = (1 - self.lamb) * v + self.lamb * v1.view_as(v) # @Grad62304977 cos, sin = self.rotary(q) q, k = F.rms_norm(q, (q.size(-1),)), F.rms_norm(k, (k.size(-1),)) # QK norm suggested by @Grad62304977 q, k = apply_rotary_emb(q, cos, sin), apply_rotary_emb(k, cos, sin) y = F.scaled_dot_product_attention(q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), is_causal=True) y = y.transpose(1, 2).contiguous().view_as(x) # re-assemble all head outputs side by side y = self.c_proj(y) return y, v1 class MLP(nn.Module): def __init__(self, config): super().__init__() self.c_fc = CastedLinear(config.n_embd, 4 * config.n_embd, bias=False) self.c_proj = CastedLinear(4 * config.n_embd, config.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 def forward(self, x): x = self.c_fc(x) x = F.relu(x).square() # https://arxiv.org/abs/2109.08668v2; ~1-2% better than GELU; suggested by @SKYLINEZ007 and @Grad62304977 x = self.c_proj(x) return x class Block(nn.Module): def __init__(self, config): super().__init__() self.attn = CausalSelfAttention(config) self.mlp = MLP(config) self.lambdas = nn.Parameter(torch.tensor([1., 0.])) def forward(self, x, v1, x0): x = self.lambdas[0] * x + self.lambdas[1] * x0 x1, v1 = self.attn(F.rms_norm(x, (x.size(-1),)), v1) x = x + x1 x = x + self.mlp(F.rms_norm(x, (x.size(-1),))) return x, v1 # ----------------------------------------------------------------------------- # The main GPT-2 model @dataclass class GPTConfig: vocab_size : int = 50304 n_layer : int = 12 n_head : int = 6 # head dim 128 suggested by @Grad62304977 n_embd : int = 768 class GPT(nn.Module): def __init__(self, config): super().__init__() self.config = config self.transformer = nn.ModuleDict(dict( wte = nn.Embedding(config.vocab_size, config.n_embd), h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]), )) self.lm_head = CastedLinear(config.n_embd, config.vocab_size, bias=False) self.lm_head.weight.data.zero_() # @Grad62304977 def forward(self, idx, target): # forward the GPT model itself x = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) x = F.rms_norm(x, (x.size(-1),)) # @Grad62304977 x0 = x v1 = None for block in self.transformer.h: x, v1 = block(x, v1, x0) x = F.rms_norm(x, (x.size(-1),)) logits = self.lm_head(x) logits = 30 * torch.tanh(logits / 30) # @Grad62304977 logits = logits.float() loss = F.cross_entropy(logits.view(-1, logits.size(-1)), target.view(-1)) return loss.float() # ----------------------------------------------------------------------------- # Our own simple Distributed Data Loader def _peek_data_shard(filename): # only reads the header, returns header data with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) if header[0] != 20240520: print("ERROR: magic number mismatch in the data .bin file!") print("---> HINT: Are you passing in a correct file with --input_bin?") print("---> HINT: Dataset encoding changed recently, re-run data prepro or refer again to README") print("---> HINT: For example re-run: `python dev/data/tinyshakespeare.py`, then re-try") exit(1) assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) return ntok # for now just return the number of tokens def _load_data_shard(filename): with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) assert header[0] == 20240520, "magic number mismatch in the data .bin file" assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) # the rest of it are tokens, stored as uint16 tokens = np.frombuffer(f.read(), dtype=np.uint16) assert len(tokens) == ntok, "number of tokens read does not match header?" return tokens class DistributedDataLoader: def __init__(self, filename_pattern, B, T, process_rank, num_processes): self.process_rank = process_rank self.num_processes = num_processes self.B = B self.T = T # glob files that match the pattern self.files = sorted(glob.glob(filename_pattern)) assert len(self.files) > 0, f"did not find any files that match the pattern {filename_pattern}" # load and validate all data shards, count number of tokens in total ntok_total = 0 for fname in self.files: shard_ntok = _peek_data_shard(fname) assert shard_ntok >= num_processes * B * T + 1 ntok_total += int(shard_ntok) self.ntok_total = ntok_total # kick things off self.reset() def reset(self): self.current_shard = 0 self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def advance(self): # advance to next data shard self.current_shard = (self.current_shard + 1) % len(self.files) self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def next_batch(self): B = self.B T = self.T buf = self.tokens[self.current_position : self.current_position+B*T+1] buf = torch.tensor(buf.astype(np.int32), dtype=torch.long) x = (buf[:-1]).view(B, T) # inputs y = (buf[1:]).view(B, T) # targets # advance current position and load next shard if necessary self.current_position += B * T * self.num_processes if self.current_position + (B * T * self.num_processes + 1) > len(self.tokens): self.advance() return x.cuda(), y.cuda() # ----------------------------------------------------------------------------- # int main @dataclass class Hyperparameters: # data hyperparams input_bin : str = 'data/fineweb10B/fineweb_train_*.bin' # input .bin to train on input_val_bin : str = 'data/fineweb10B/fineweb_val_*.bin' # input .bin to eval validation loss on # optimization hyperparams batch_size : int = 8*64 # batch size, in sequences, across all devices device_batch_size : int = 32 # batch size, in sequences, per device sequence_length : int = 1024 # sequence length, in tokens num_iterations : int = 3242 # number of iterations to run warmup_iters : int = 0 warmdown_iters : int = 926 # number of iterations of linear warmup/warmdown for triangular or trapezoidal schedule weight_decay : float = 0 # evaluation and logging hyperparams val_loss_every : int = 125 # every how many steps to evaluate val loss? 0 for only at the end val_tokens : int = 10485760 # how many tokens of validation data? it's important to keep this fixed for consistent comparisons save_every : int = 0 # every how many steps to save the checkpoint? 0 for only at the end args = Hyperparameters() # set up DDP (distributed data parallel). torchrun sets this env variable assert torch.cuda.is_available() dist.init_process_group(backend='nccl') ddp_rank = int(os.environ['RANK']) ddp_local_rank = int(os.environ['LOCAL_RANK']) ddp_world_size = int(os.environ['WORLD_SIZE']) device = f'cuda:{ddp_local_rank}' torch.cuda.set_device(device) print(f"using device: {device}") master_process = (ddp_rank == 0) # this process will do logging, checkpointing etc. # begin logging logfile = None if master_process: run_id = str(uuid.uuid4()) logdir = 'logs/%s/' % run_id os.makedirs(logdir, exist_ok=True) logfile = 'logs/%s.txt' % run_id # create the log file with open(logfile, "w") as f: # begin the log by printing this file (the Python code) f.write('='*100 + '\n') f.write(code) f.write('='*100 + '\n') def print0(s, logonly=False): if master_process: with open(logfile, "a") as f: if not logonly: print(s) f.write(s+'\n') # log information about the hardware/software environment this is running on # and print the full `nvidia-smi` to file print0(f"Running pytorch {torch.version.__version__} compiled for CUDA {torch.version.cuda}\nnvidia-smi:") import subprocess result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) print0(f'{result.stdout}', logonly=True) print0('='*100, logonly=True) # convenience variables B, T = args.device_batch_size, args.sequence_length # calculate the number of steps to take in the val loop. assert args.val_tokens % (B * T * ddp_world_size) == 0 val_steps = args.val_tokens // (B * T * ddp_world_size) # calculate the steps of gradient accumulation required to attain the desired global batch size. assert args.batch_size % (B * ddp_world_size) == 0 train_accumulation_steps = args.batch_size // (B * ddp_world_size) # load tokens train_loader = DistributedDataLoader(args.input_bin, B, T, ddp_rank, ddp_world_size) val_loader = DistributedDataLoader(args.input_val_bin, B, T, ddp_rank, ddp_world_size) print0(f"Training DataLoader: total number of tokens: {train_loader.ntok_total} across {len(train_loader.files)} files") print0(f"Validation DataLoader: total number of tokens: {val_loader.ntok_total} across {len(val_loader.files)} files") print0('='*100, logonly=True) x, y = train_loader.next_batch() # there are only 50257 unique GPT-2 tokens; we extend to nearest multiple of 128 for efficiency. suggested to me by @Grad62304977. # this originates from Karpathy's experiments. num_vocab = 50304 model = GPT(GPTConfig(vocab_size=num_vocab, n_layer=12, n_head=8, n_embd=1024)) model = model.cuda().bfloat16() for m in model.modules(): if isinstance(m, CastedLinear): m.float() if hasattr(config, "coordinate_descent_tuning"): config.coordinate_descent_tuning = True # suggested by @Chillee model = torch.compile(model) # here we wrap model into DDP container model = DDP(model, device_ids=[ddp_local_rank]) raw_model = model.module # always contains the "raw" unwrapped model # CUDNN attention is ~4ms faster than Flash, but doesn't get selected by default in PyTorch 2.5.1 from torch.backends.cuda import enable_cudnn_sdp, enable_flash_sdp, enable_math_sdp, enable_mem_efficient_sdp enable_cudnn_sdp(True) enable_flash_sdp(False) enable_mem_efficient_sdp(False) enable_math_sdp(False) # init the optimizer(s) optimizer1 = torch.optim.Adam([raw_model.transformer.wte.weight], lr=0.3, betas=(0.9, 0.95), fused=True) optimizer2 = torch.optim.Adam([raw_model.lm_head.weight], lr=0.002, betas=(0.9, 0.95), fused=True) params = list(raw_model.transformer.h.parameters()) matrix_params = [p for p in params if p.ndim == 2] scalar_params = [p for p in params if p.ndim < 2] optimizer3 = Muon(matrix_params, lr=0.02, momentum=0.95) optimizer4 = torch.optim.Adam(scalar_params, lr=0.02, betas=(0.9, 0.95), fused=True) # note that this learning rate is neither sensitive nor tuned optimizers = [optimizer1, optimizer2, optimizer3, optimizer4] # learning rate decay scheduler (linear warmup and warmdown) def get_lr(it): assert it <= args.num_iterations # 1) linear warmup for warmup_iters steps if it < args.warmup_iters: return (it+1) / args.warmup_iters # 2) constant lr for a while elif it < args.num_iterations - args.warmdown_iters: return 1.0 # 3) linear warmdown else: decay_ratio = (args.num_iterations - it) / args.warmdown_iters return decay_ratio schedulers = [torch.optim.lr_scheduler.LambdaLR(opt, get_lr) for opt in optimizers] # Start training loop training_time_ms = 0 # start the clock torch.cuda.synchronize() t0 = time.time() # begin training train_loader.reset() for step in range(args.num_iterations + 1): last_step = (step == args.num_iterations) # This effectively ignores timing first 10 steps, which are slower for weird reasons. # Alternately, and slightly more correctly in terms of benchmarking, we could do 10 # steps with dummy data first, and then re-initialize the model and reset the loader. if step == 10: training_time_ms = 0 t0 = time.time() timed_steps = float('nan') if step <= 11 else (step - 10) + 1 # <= 11 to avoid bug in val # once in a while evaluate the validation dataset if (last_step or (args.val_loss_every > 0 and step % args.val_loss_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # run validation batches model.eval() val_loader.reset() val_loss = 0.0 for _ in range(val_steps): with torch.no_grad(): x_val, y_val = val_loader.next_batch() val_loss += model(x_val, y_val) dist.all_reduce(val_loss, op=dist.ReduceOp.AVG) val_loss /= val_steps # log val loss to console and to logfile print0(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms') # start the clock again torch.cuda.synchronize() t0 = time.time() if master_process and (last_step or (args.save_every > 0 and step % args.save_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # save the state of the training process log = dict(step=step, code=code, model=raw_model.state_dict(), optimizers=[opt.state_dict() for opt in optimizers]) torch.save(log, 'logs/%s/state_step%06d.pt' % (run_id, step)) # start the clock again torch.cuda.synchronize() t0 = time.time() # bit confusing: we want to make sure to eval on 0th iteration # but also after the very last iteration. so we loop for step <= num_iterations # instead of just < num_iterations (one extra due to <=), only to do # the validation/sampling one last time, and then we break right here as we're done. if last_step: break # --------------- TRAINING SECTION BEGIN ----------------- model.train() for i in range(1, train_accumulation_steps+1): # forward pass loss = model(x, y) train_loss = loss.detach() # advance the dataset for the next batch x, y = train_loader.next_batch() # backward pass if i < train_accumulation_steps: with model.no_sync(): # there's no need to sync gradients every accumulation step loss.backward() else: loss.backward() # just sync on the last step for p in model.parameters(): p.grad /= train_accumulation_steps # momentum warmup for Muon frac = min(step/500, 1) optimizer3.param_groups[0]['momentum'] = (1 - frac) * 0.85 + frac * 0.95 # step the optimizers and schedulers for opt, sched in zip(optimizers, schedulers): opt.step() sched.step() # null the gradients model.zero_grad(set_to_none=True) # --------------- TRAINING SECTION END ------------------- # everything that follows now is just diagnostics, prints, logging, etc. #dist.all_reduce(train_loss, op=dist.ReduceOp.AVG) # all-reducing the training loss would be more correct in terms of logging, but slower approx_time = training_time_ms + 1000 * (time.time() - t0) print0(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms") if master_process: print(f"peak memory consumption: {torch.cuda.max_memory_allocated() // 1024 // 1024} MiB") # ------------------------------------------------------------------------- # clean up nice dist.destroy_process_group() ==================================================================================================== Running pytorch 2.5.1+cu124 compiled for CUDA 12.4 nvidia-smi: Wed Nov 13 23:53:09 2024 +-----------------------------------------------------------------------------------------+ | NVIDIA-SMI 550.90.12 Driver Version: 550.90.12 CUDA Version: 12.6 | |-----------------------------------------+------------------------+----------------------+ | GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |=========================================+========================+======================| | 0 NVIDIA H100 80GB HBM3 On | 00000000:61:00.0 Off | 0 | | N/A 45C P0 73W / 700W | 4MiB / 81559MiB | 0% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 1 NVIDIA H100 80GB HBM3 On | 00000000:62:00.0 Off | 0 | | N/A 35C P0 72W / 700W | 4MiB / 81559MiB | 0% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 2 NVIDIA H100 80GB HBM3 On | 00000000:63:00.0 Off | 0 | | N/A 33C P0 70W / 700W | 4MiB / 81559MiB | 0% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 3 NVIDIA H100 80GB HBM3 On | 00000000:64:00.0 Off | 0 | | N/A 44C P0 74W / 700W | 4MiB / 81559MiB | 0% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 4 NVIDIA H100 80GB HBM3 On | 00000000:6A:00.0 Off | 0 | | N/A 46C P0 74W / 700W | 4MiB / 81559MiB | 0% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 5 NVIDIA H100 80GB HBM3 On | 00000000:6B:00.0 Off | 0 | | N/A 34C P0 70W / 700W | 4MiB / 81559MiB | 0% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 6 NVIDIA H100 80GB HBM3 On | 00000000:6C:00.0 Off | 0 | | N/A 46C P0 96W / 700W | 22MiB / 81559MiB | 0% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 7 NVIDIA H100 80GB HBM3 On | 00000000:6D:00.0 Off | 0 | | N/A 33C P0 69W / 700W | 4MiB / 81559MiB | 0% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ +-----------------------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=========================================================================================| +-----------------------------------------------------------------------------------------+ ==================================================================================================== Training DataLoader: total number of tokens: 10255324043 across 103 files Validation DataLoader: total number of tokens: 100000000 across 1 files ==================================================================================================== step:0/3242 val_loss:10.8259 train_time:250ms step_avg:nanms step:1/3242 train_loss:10.8259 train_time:8410ms step_avg:nanms step:2/3242 train_loss:10.4946 train_time:8768ms step_avg:nanms step:3/3242 train_loss:9.8800 train_time:9155ms step_avg:nanms step:4/3242 train_loss:9.0320 train_time:9541ms step_avg:nanms step:5/3242 train_loss:8.2343 train_time:9928ms step_avg:nanms step:6/3242 train_loss:7.5355 train_time:10313ms step_avg:nanms step:7/3242 train_loss:7.3648 train_time:10699ms step_avg:nanms step:8/3242 train_loss:7.3750 train_time:11086ms step_avg:nanms step:9/3242 train_loss:7.0363 train_time:11472ms step_avg:nanms step:10/3242 train_loss:7.1248 train_time:11858ms step_avg:nanms step:11/3242 train_loss:6.7939 train_time:361ms step_avg:nanms step:12/3242 train_loss:6.8309 train_time:746ms step_avg:nanms step:13/3242 train_loss:6.6004 train_time:1132ms step_avg:377.33ms step:14/3242 train_loss:6.5081 train_time:1519ms step_avg:379.72ms step:15/3242 train_loss:6.4761 train_time:1906ms step_avg:381.15ms step:16/3242 train_loss:6.6436 train_time:2293ms step_avg:382.13ms step:17/3242 train_loss:6.6865 train_time:2680ms step_avg:382.82ms step:18/3242 train_loss:6.5253 train_time:3067ms step_avg:383.35ms step:19/3242 train_loss:6.5531 train_time:3454ms step_avg:383.73ms step:20/3242 train_loss:6.2810 train_time:3840ms step_avg:384.04ms step:21/3242 train_loss:6.3547 train_time:6658ms step_avg:605.26ms step:22/3242 train_loss:6.9627 train_time:7046ms step_avg:587.13ms step:23/3242 train_loss:6.2797 train_time:7434ms step_avg:571.82ms step:24/3242 train_loss:6.7594 train_time:7823ms step_avg:558.75ms step:25/3242 train_loss:6.1846 train_time:8211ms step_avg:547.42ms step:26/3242 train_loss:6.0579 train_time:8601ms step_avg:537.55ms step:27/3242 train_loss:6.5825 train_time:8990ms step_avg:528.84ms step:28/3242 train_loss:5.7910 train_time:9379ms step_avg:521.07ms step:29/3242 train_loss:6.1633 train_time:9768ms step_avg:514.10ms step:30/3242 train_loss:6.1731 train_time:10158ms step_avg:507.91ms step:31/3242 train_loss:5.8767 train_time:10548ms step_avg:502.28ms step:32/3242 train_loss:6.0172 train_time:10937ms step_avg:497.11ms step:33/3242 train_loss:6.2359 train_time:11327ms step_avg:492.47ms step:34/3242 train_loss:6.2113 train_time:11717ms step_avg:488.23ms step:35/3242 train_loss:6.2317 train_time:12108ms step_avg:484.31ms step:36/3242 train_loss:6.1062 train_time:12498ms step_avg:480.68ms step:37/3242 train_loss:6.1579 train_time:12888ms step_avg:477.34ms step:38/3242 train_loss:5.8813 train_time:13278ms step_avg:474.23ms step:39/3242 train_loss:5.9924 train_time:13669ms step_avg:471.33ms step:40/3242 train_loss:5.8693 train_time:14058ms step_avg:468.59ms step:41/3242 train_loss:5.9627 train_time:14447ms step_avg:466.04ms step:42/3242 train_loss:5.5768 train_time:14837ms step_avg:463.66ms step:43/3242 train_loss:5.9985 train_time:15228ms step_avg:461.47ms step:44/3242 train_loss:5.9084 train_time:15619ms step_avg:459.37ms step:45/3242 train_loss:5.9273 train_time:16010ms step_avg:457.42ms step:46/3242 train_loss:5.8828 train_time:16401ms step_avg:455.58ms step:47/3242 train_loss:5.7141 train_time:16792ms step_avg:453.85ms step:48/3242 train_loss:5.8696 train_time:17185ms step_avg:452.23ms step:49/3242 train_loss:5.6505 train_time:17576ms step_avg:450.67ms step:50/3242 train_loss:5.8674 train_time:17967ms step_avg:449.18ms step:51/3242 train_loss:5.8406 train_time:18357ms step_avg:447.74ms step:52/3242 train_loss:5.7166 train_time:18748ms step_avg:446.38ms step:53/3242 train_loss:5.7376 train_time:19140ms step_avg:445.11ms step:54/3242 train_loss:5.6817 train_time:19531ms step_avg:443.89ms step:55/3242 train_loss:5.9785 train_time:19921ms step_avg:442.70ms step:56/3242 train_loss:5.7853 train_time:20314ms step_avg:441.61ms step:57/3242 train_loss:5.5351 train_time:20707ms step_avg:440.58ms step:58/3242 train_loss:5.7850 train_time:21098ms step_avg:439.55ms step:59/3242 train_loss:5.6448 train_time:21489ms step_avg:438.56ms step:60/3242 train_loss:5.8609 train_time:21881ms step_avg:437.61ms step:61/3242 train_loss:5.4983 train_time:22274ms step_avg:436.75ms step:62/3242 train_loss:5.6967 train_time:22666ms step_avg:435.88ms step:63/3242 train_loss:5.6496 train_time:23058ms step_avg:435.05ms step:64/3242 train_loss:5.3931 train_time:23449ms step_avg:434.24ms step:65/3242 train_loss:5.5502 train_time:23840ms step_avg:433.46ms step:66/3242 train_loss:5.6782 train_time:24232ms step_avg:432.71ms step:67/3242 train_loss:5.4591 train_time:24623ms step_avg:431.98ms step:68/3242 train_loss:5.8365 train_time:25015ms step_avg:431.30ms step:69/3242 train_loss:5.3021 train_time:25407ms step_avg:430.63ms step:70/3242 train_loss:5.4220 train_time:25799ms step_avg:429.98ms step:71/3242 train_loss:5.4005 train_time:26191ms step_avg:429.35ms step:72/3242 train_loss:5.5457 train_time:26583ms step_avg:428.76ms step:73/3242 train_loss:5.2983 train_time:26976ms step_avg:428.20ms step:74/3242 train_loss:5.7324 train_time:27368ms step_avg:427.62ms step:75/3242 train_loss:5.4779 train_time:27759ms step_avg:427.06ms step:76/3242 train_loss:5.4683 train_time:28151ms step_avg:426.52ms step:77/3242 train_loss:5.5620 train_time:28541ms step_avg:425.99ms step:78/3242 train_loss:5.1698 train_time:28933ms step_avg:425.48ms step:79/3242 train_loss:5.4524 train_time:29325ms step_avg:425.00ms step:80/3242 train_loss:5.4200 train_time:29717ms step_avg:424.52ms step:81/3242 train_loss:5.2581 train_time:30108ms step_avg:424.05ms step:82/3242 train_loss:5.3323 train_time:30499ms step_avg:423.60ms step:83/3242 train_loss:5.3236 train_time:30892ms step_avg:423.17ms step:84/3242 train_loss:5.1759 train_time:31284ms step_avg:422.76ms step:85/3242 train_loss:5.2114 train_time:31676ms step_avg:422.35ms step:86/3242 train_loss:5.4320 train_time:32068ms step_avg:421.95ms step:87/3242 train_loss:5.2672 train_time:32459ms step_avg:421.55ms step:88/3242 train_loss:5.3743 train_time:32850ms step_avg:421.15ms step:89/3242 train_loss:5.4501 train_time:33244ms step_avg:420.81ms step:90/3242 train_loss:5.1884 train_time:33636ms step_avg:420.45ms step:91/3242 train_loss:5.3611 train_time:34027ms step_avg:420.09ms step:92/3242 train_loss:5.4096 train_time:34418ms step_avg:419.73ms step:93/3242 train_loss:5.1394 train_time:34809ms step_avg:419.38ms step:94/3242 train_loss:5.1739 train_time:35200ms step_avg:419.05ms step:95/3242 train_loss:5.1781 train_time:35591ms step_avg:418.72ms step:96/3242 train_loss:5.1719 train_time:35983ms step_avg:418.41ms step:97/3242 train_loss:5.1978 train_time:36375ms step_avg:418.10ms step:98/3242 train_loss:5.0924 train_time:36767ms step_avg:417.81ms step:99/3242 train_loss:5.1335 train_time:37159ms step_avg:417.52ms step:100/3242 train_loss:5.1234 train_time:37551ms step_avg:417.23ms step:101/3242 train_loss:5.2332 train_time:37942ms step_avg:416.94ms step:102/3242 train_loss:5.1261 train_time:38334ms step_avg:416.68ms step:103/3242 train_loss:4.9999 train_time:38726ms step_avg:416.41ms step:104/3242 train_loss:4.9080 train_time:39117ms step_avg:416.14ms step:105/3242 train_loss:5.1007 train_time:39508ms step_avg:415.87ms step:106/3242 train_loss:4.8909 train_time:39899ms step_avg:415.61ms step:107/3242 train_loss:5.0263 train_time:40290ms step_avg:415.36ms step:108/3242 train_loss:4.9497 train_time:40682ms step_avg:415.12ms step:109/3242 train_loss:5.0690 train_time:41075ms step_avg:414.90ms step:110/3242 train_loss:4.9502 train_time:41467ms step_avg:414.67ms step:111/3242 train_loss:5.0101 train_time:41858ms step_avg:414.43ms step:112/3242 train_loss:5.1365 train_time:42249ms step_avg:414.21ms step:113/3242 train_loss:4.9966 train_time:42640ms step_avg:413.98ms step:114/3242 train_loss:4.9115 train_time:43219ms step_avg:415.56ms step:115/3242 train_loss:5.0646 train_time:43617ms step_avg:415.40ms step:116/3242 train_loss:4.9884 train_time:44008ms step_avg:415.17ms step:117/3242 train_loss:4.8091 train_time:44400ms step_avg:414.95ms step:118/3242 train_loss:5.0470 train_time:44792ms step_avg:414.74ms step:119/3242 train_loss:4.9184 train_time:45189ms step_avg:414.58ms step:120/3242 train_loss:4.9164 train_time:45578ms step_avg:414.35ms step:121/3242 train_loss:4.6892 train_time:45969ms step_avg:414.14ms step:122/3242 train_loss:4.9529 train_time:46360ms step_avg:413.93ms step:123/3242 train_loss:4.8358 train_time:46750ms step_avg:413.72ms step:124/3242 train_loss:5.1736 train_time:47141ms step_avg:413.52ms step:125/3242 train_loss:4.9735 train_time:47532ms step_avg:413.32ms step:125/3242 val_loss:4.8703 train_time:47558ms step_avg:413.55ms step:126/3242 train_loss:4.9371 train_time:47936ms step_avg:413.24ms step:127/3242 train_loss:4.8998 train_time:48340ms step_avg:413.16ms step:128/3242 train_loss:4.9058 train_time:48744ms step_avg:413.09ms step:129/3242 train_loss:5.3677 train_time:49148ms step_avg:413.01ms step:130/3242 train_loss:4.9083 train_time:49550ms step_avg:412.92ms step:131/3242 train_loss:4.6640 train_time:49954ms step_avg:412.84ms step:132/3242 train_loss:4.6935 train_time:50357ms step_avg:412.76ms step:133/3242 train_loss:4.8178 train_time:50763ms step_avg:412.70ms step:134/3242 train_loss:4.7693 train_time:51167ms step_avg:412.64ms step:135/3242 train_loss:4.7932 train_time:51572ms step_avg:412.58ms step:136/3242 train_loss:4.6375 train_time:51976ms step_avg:412.51ms step:137/3242 train_loss:4.7935 train_time:52379ms step_avg:412.44ms step:138/3242 train_loss:4.7632 train_time:52787ms step_avg:412.39ms step:139/3242 train_loss:4.8063 train_time:53190ms step_avg:412.32ms step:140/3242 train_loss:5.1009 train_time:53593ms step_avg:412.25ms step:141/3242 train_loss:4.8137 train_time:53996ms step_avg:412.18ms step:142/3242 train_loss:4.7740 train_time:54398ms step_avg:412.11ms step:143/3242 train_loss:4.8214 train_time:54801ms step_avg:412.04ms step:144/3242 train_loss:4.7484 train_time:55205ms step_avg:411.98ms step:145/3242 train_loss:4.6400 train_time:55608ms step_avg:411.91ms step:146/3242 train_loss:4.5507 train_time:56016ms step_avg:411.88ms step:147/3242 train_loss:4.7042 train_time:56419ms step_avg:411.82ms step:148/3242 train_loss:4.7342 train_time:56820ms step_avg:411.74ms step:149/3242 train_loss:4.9814 train_time:57222ms step_avg:411.67ms step:150/3242 train_loss:4.7733 train_time:57624ms step_avg:411.60ms step:151/3242 train_loss:4.6602 train_time:58026ms step_avg:411.53ms step:152/3242 train_loss:4.6839 train_time:58428ms step_avg:411.46ms step:153/3242 train_loss:4.7955 train_time:58829ms step_avg:411.39ms step:154/3242 train_loss:4.9081 train_time:59231ms step_avg:411.33ms step:155/3242 train_loss:4.5998 train_time:59633ms step_avg:411.26ms step:156/3242 train_loss:4.7738 train_time:60212ms step_avg:412.41ms step:157/3242 train_loss:4.8098 train_time:60622ms step_avg:412.39ms step:158/3242 train_loss:4.6153 train_time:61024ms step_avg:412.33ms step:159/3242 train_loss:4.6968 train_time:61426ms step_avg:412.25ms step:160/3242 train_loss:4.5556 train_time:61829ms step_avg:412.19ms step:161/3242 train_loss:4.6992 train_time:62232ms step_avg:412.14ms step:162/3242 train_loss:4.7464 train_time:62634ms step_avg:412.07ms step:163/3242 train_loss:4.6412 train_time:63038ms step_avg:412.01ms step:164/3242 train_loss:4.5042 train_time:63442ms step_avg:411.96ms step:165/3242 train_loss:4.5724 train_time:63844ms step_avg:411.90ms step:166/3242 train_loss:4.7600 train_time:64423ms step_avg:412.97ms step:167/3242 train_loss:4.3898 train_time:64833ms step_avg:412.95ms step:168/3242 train_loss:4.8138 train_time:65236ms step_avg:412.88ms step:169/3242 train_loss:4.4356 train_time:65639ms step_avg:412.82ms step:170/3242 train_loss:4.3575 train_time:66042ms step_avg:412.76ms step:171/3242 train_loss:4.5063 train_time:66446ms step_avg:412.71ms step:172/3242 train_loss:4.4998 train_time:66848ms step_avg:412.64ms step:173/3242 train_loss:4.5339 train_time:67250ms step_avg:412.58ms step:174/3242 train_loss:4.6726 train_time:67653ms step_avg:412.52ms step:175/3242 train_loss:4.6327 train_time:68056ms step_avg:412.46ms step:176/3242 train_loss:4.4115 train_time:68458ms step_avg:412.39ms step:177/3242 train_loss:4.3827 train_time:68860ms step_avg:412.34ms step:178/3242 train_loss:4.4201 train_time:69395ms step_avg:413.07ms step:179/3242 train_loss:4.3977 train_time:69804ms step_avg:413.04ms step:180/3242 train_loss:4.4760 train_time:70206ms step_avg:412.97ms step:181/3242 train_loss:4.3973 train_time:70607ms step_avg:412.91ms step:182/3242 train_loss:4.4516 train_time:71014ms step_avg:412.87ms step:183/3242 train_loss:4.4373 train_time:71594ms step_avg:413.84ms step:184/3242 train_loss:4.3834 train_time:71999ms step_avg:413.79ms step:185/3242 train_loss:4.7056 train_time:72401ms step_avg:413.72ms step:186/3242 train_loss:4.5178 train_time:72803ms step_avg:413.65ms step:187/3242 train_loss:4.6992 train_time:73206ms step_avg:413.59ms step:188/3242 train_loss:4.4140 train_time:73608ms step_avg:413.53ms step:189/3242 train_loss:4.3714 train_time:74011ms step_avg:413.47ms step:190/3242 train_loss:4.5532 train_time:74607ms step_avg:414.49ms step:191/3242 train_loss:4.5801 train_time:75214ms step_avg:415.55ms step:192/3242 train_loss:4.3431 train_time:75618ms step_avg:415.48ms step:193/3242 train_loss:4.3792 train_time:76020ms step_avg:415.41ms step:194/3242 train_loss:4.5645 train_time:76423ms step_avg:415.34ms step:195/3242 train_loss:4.2837 train_time:76824ms step_avg:415.27ms step:196/3242 train_loss:4.3953 train_time:77227ms step_avg:415.20ms step:197/3242 train_loss:4.3975 train_time:77628ms step_avg:415.12ms step:198/3242 train_loss:4.2053 train_time:78029ms step_avg:415.05ms step:199/3242 train_loss:4.3992 train_time:78430ms step_avg:414.97ms step:200/3242 train_loss:4.3866 train_time:78832ms step_avg:414.91ms step:201/3242 train_loss:5.8504 train_time:79234ms step_avg:414.84ms step:202/3242 train_loss:4.9865 train_time:79636ms step_avg:414.77ms step:203/3242 train_loss:4.2669 train_time:80038ms step_avg:414.70ms step:204/3242 train_loss:4.3156 train_time:80440ms step_avg:414.64ms step:205/3242 train_loss:4.2517 train_time:80842ms step_avg:414.58ms step:206/3242 train_loss:4.3050 train_time:81244ms step_avg:414.51ms step:207/3242 train_loss:4.2697 train_time:81646ms step_avg:414.45ms step:208/3242 train_loss:4.2806 train_time:82047ms step_avg:414.38ms step:209/3242 train_loss:4.4643 train_time:82448ms step_avg:414.31ms step:210/3242 train_loss:4.3369 train_time:82849ms step_avg:414.25ms step:211/3242 train_loss:4.3231 train_time:83250ms step_avg:414.18ms step:212/3242 train_loss:4.3844 train_time:83651ms step_avg:414.11ms step:213/3242 train_loss:4.2843 train_time:84052ms step_avg:414.05ms step:214/3242 train_loss:4.2155 train_time:84453ms step_avg:413.99ms step:215/3242 train_loss:4.3141 train_time:84854ms step_avg:413.92ms step:216/3242 train_loss:4.1723 train_time:85255ms step_avg:413.86ms step:217/3242 train_loss:4.2716 train_time:85657ms step_avg:413.80ms step:218/3242 train_loss:4.2831 train_time:86058ms step_avg:413.74ms step:219/3242 train_loss:4.2579 train_time:86460ms step_avg:413.68ms step:220/3242 train_loss:4.2058 train_time:86860ms step_avg:413.62ms step:221/3242 train_loss:4.2034 train_time:87261ms step_avg:413.56ms step:222/3242 train_loss:4.2586 train_time:87662ms step_avg:413.50ms step:223/3242 train_loss:4.2586 train_time:88064ms step_avg:413.45ms step:224/3242 train_loss:4.3700 train_time:88622ms step_avg:414.12ms step:225/3242 train_loss:4.2634 train_time:89022ms step_avg:414.05ms step:226/3242 train_loss:4.2189 train_time:89422ms step_avg:413.99ms step:227/3242 train_loss:4.2720 train_time:89823ms step_avg:413.93ms step:228/3242 train_loss:4.0640 train_time:90224ms step_avg:413.87ms step:229/3242 train_loss:4.3177 train_time:90625ms step_avg:413.81ms step:230/3242 train_loss:4.2972 train_time:91026ms step_avg:413.75ms step:231/3242 train_loss:3.9997 train_time:91427ms step_avg:413.70ms step:232/3242 train_loss:4.1851 train_time:91827ms step_avg:413.64ms step:233/3242 train_loss:4.1693 train_time:92228ms step_avg:413.58ms step:234/3242 train_loss:4.3084 train_time:92628ms step_avg:413.52ms step:235/3242 train_loss:4.3233 train_time:93030ms step_avg:413.47ms step:236/3242 train_loss:4.2933 train_time:93432ms step_avg:413.42ms step:237/3242 train_loss:4.4919 train_time:93835ms step_avg:413.37ms step:238/3242 train_loss:4.2522 train_time:94236ms step_avg:413.32ms step:239/3242 train_loss:4.1764 train_time:94640ms step_avg:413.27ms step:240/3242 train_loss:4.3342 train_time:95042ms step_avg:413.23ms step:241/3242 train_loss:4.4214 train_time:95444ms step_avg:413.18ms step:242/3242 train_loss:4.2409 train_time:95847ms step_avg:413.13ms step:243/3242 train_loss:4.8992 train_time:96248ms step_avg:413.08ms step:244/3242 train_loss:4.2593 train_time:96650ms step_avg:413.04ms step:245/3242 train_loss:4.2431 train_time:97052ms step_avg:412.99ms step:246/3242 train_loss:4.2959 train_time:97455ms step_avg:412.94ms step:247/3242 train_loss:4.1011 train_time:97857ms step_avg:412.90ms step:248/3242 train_loss:4.1552 train_time:98259ms step_avg:412.85ms step:249/3242 train_loss:4.0699 train_time:98662ms step_avg:412.81ms step:250/3242 train_loss:4.3935 train_time:99063ms step_avg:412.76ms step:250/3242 val_loss:4.1830 train_time:99090ms step_avg:412.88ms step:251/3242 train_loss:4.2053 train_time:99465ms step_avg:412.72ms step:252/3242 train_loss:4.1429 train_time:99870ms step_avg:412.69ms step:253/3242 train_loss:4.0684 train_time:100273ms step_avg:412.65ms step:254/3242 train_loss:4.1376 train_time:100675ms step_avg:412.60ms step:255/3242 train_loss:4.1527 train_time:101252ms step_avg:413.28ms step:256/3242 train_loss:4.0894 train_time:101653ms step_avg:413.22ms step:257/3242 train_loss:4.0734 train_time:102054ms step_avg:413.17ms step:258/3242 train_loss:4.2291 train_time:102454ms step_avg:413.12ms step:259/3242 train_loss:4.0748 train_time:102855ms step_avg:413.07ms step:260/3242 train_loss:4.0523 train_time:103258ms step_avg:413.03ms step:261/3242 train_loss:4.0504 train_time:103660ms step_avg:412.99ms step:262/3242 train_loss:4.0663 train_time:104060ms step_avg:412.94ms step:263/3242 train_loss:4.3209 train_time:104462ms step_avg:412.89ms step:264/3242 train_loss:4.2074 train_time:104863ms step_avg:412.85ms step:265/3242 train_loss:4.9189 train_time:105265ms step_avg:412.80ms step:266/3242 train_loss:4.1210 train_time:105667ms step_avg:412.76ms step:267/3242 train_loss:4.6178 train_time:106067ms step_avg:412.71ms step:268/3242 train_loss:4.0516 train_time:106468ms step_avg:412.67ms step:269/3242 train_loss:3.9511 train_time:106869ms step_avg:412.62ms step:270/3242 train_loss:4.1998 train_time:107270ms step_avg:412.58ms step:271/3242 train_loss:4.1472 train_time:107671ms step_avg:412.53ms step:272/3242 train_loss:4.0930 train_time:108073ms step_avg:412.49ms step:273/3242 train_loss:4.0678 train_time:108475ms step_avg:412.45ms step:274/3242 train_loss:4.1273 train_time:108876ms step_avg:412.41ms step:275/3242 train_loss:4.0399 train_time:109278ms step_avg:412.37ms step:276/3242 train_loss:4.2820 train_time:109679ms step_avg:412.33ms step:277/3242 train_loss:4.2575 train_time:110080ms step_avg:412.28ms step:278/3242 train_loss:3.9893 train_time:110481ms step_avg:412.24ms step:279/3242 train_loss:4.2927 train_time:110883ms step_avg:412.20ms step:280/3242 train_loss:4.2273 train_time:111283ms step_avg:412.16ms step:281/3242 train_loss:3.9996 train_time:111684ms step_avg:412.12ms step:282/3242 train_loss:4.2060 train_time:112084ms step_avg:412.07ms step:283/3242 train_loss:4.5875 train_time:112483ms step_avg:412.03ms step:284/3242 train_loss:4.0641 train_time:112884ms step_avg:411.99ms step:285/3242 train_loss:4.0141 train_time:113284ms step_avg:411.94ms step:286/3242 train_loss:4.3448 train_time:113684ms step_avg:411.90ms step:287/3242 train_loss:4.0943 train_time:114085ms step_avg:411.86ms step:288/3242 train_loss:4.0275 train_time:114486ms step_avg:411.82ms step:289/3242 train_loss:4.1513 train_time:114886ms step_avg:411.78ms step:290/3242 train_loss:4.1888 train_time:115287ms step_avg:411.74ms step:291/3242 train_loss:4.0611 train_time:115688ms step_avg:411.70ms step:292/3242 train_loss:4.0299 train_time:116088ms step_avg:411.66ms step:293/3242 train_loss:4.1574 train_time:116488ms step_avg:411.62ms step:294/3242 train_loss:4.0712 train_time:116889ms step_avg:411.58ms step:295/3242 train_loss:4.0925 train_time:117291ms step_avg:411.55ms step:296/3242 train_loss:4.1249 train_time:117691ms step_avg:411.51ms step:297/3242 train_loss:4.0862 train_time:118095ms step_avg:411.48ms step:298/3242 train_loss:4.0320 train_time:118496ms step_avg:411.44ms step:299/3242 train_loss:4.0179 train_time:118898ms step_avg:411.41ms step:300/3242 train_loss:4.2680 train_time:119298ms step_avg:411.37ms step:301/3242 train_loss:4.0677 train_time:119699ms step_avg:411.34ms step:302/3242 train_loss:4.1753 train_time:120100ms step_avg:411.30ms step:303/3242 train_loss:4.0388 train_time:120500ms step_avg:411.26ms step:304/3242 train_loss:4.0024 train_time:120901ms step_avg:411.23ms step:305/3242 train_loss:4.0888 train_time:121301ms step_avg:411.19ms step:306/3242 train_loss:4.0066 train_time:121704ms step_avg:411.16ms step:307/3242 train_loss:4.1728 train_time:122103ms step_avg:411.12ms step:308/3242 train_loss:3.8976 train_time:122504ms step_avg:411.09ms step:309/3242 train_loss:4.2970 train_time:122906ms step_avg:411.06ms step:310/3242 train_loss:3.9918 train_time:123306ms step_avg:411.02ms step:311/3242 train_loss:4.2336 train_time:123708ms step_avg:410.99ms step:312/3242 train_loss:4.0939 train_time:124107ms step_avg:410.95ms step:313/3242 train_loss:4.0675 train_time:124508ms step_avg:410.92ms step:314/3242 train_loss:3.9865 train_time:124908ms step_avg:410.88ms step:315/3242 train_loss:4.0408 train_time:125309ms step_avg:410.85ms step:316/3242 train_loss:3.7849 train_time:125708ms step_avg:410.81ms step:317/3242 train_loss:4.0384 train_time:126108ms step_avg:410.78ms step:318/3242 train_loss:4.0706 train_time:126511ms step_avg:410.75ms step:319/3242 train_loss:3.9513 train_time:126911ms step_avg:410.72ms step:320/3242 train_loss:3.9866 train_time:127312ms step_avg:410.68ms step:321/3242 train_loss:3.9619 train_time:127713ms step_avg:410.65ms step:322/3242 train_loss:4.2091 train_time:128112ms step_avg:410.62ms step:323/3242 train_loss:3.9776 train_time:128512ms step_avg:410.58ms step:324/3242 train_loss:3.9884 train_time:128913ms step_avg:410.55ms step:325/3242 train_loss:3.9366 train_time:129314ms step_avg:410.52ms step:326/3242 train_loss:4.0677 train_time:129714ms step_avg:410.49ms step:327/3242 train_loss:3.9664 train_time:130115ms step_avg:410.46ms step:328/3242 train_loss:4.1693 train_time:130517ms step_avg:410.43ms step:329/3242 train_loss:4.0392 train_time:130917ms step_avg:410.40ms step:330/3242 train_loss:4.1396 train_time:131318ms step_avg:410.37ms step:331/3242 train_loss:4.0324 train_time:131720ms step_avg:410.34ms step:332/3242 train_loss:4.5008 train_time:132122ms step_avg:410.32ms step:333/3242 train_loss:3.9800 train_time:132524ms step_avg:410.29ms step:334/3242 train_loss:3.8860 train_time:132925ms step_avg:410.26ms step:335/3242 train_loss:3.9564 train_time:133326ms step_avg:410.23ms step:336/3242 train_loss:4.1345 train_time:133727ms step_avg:410.21ms step:337/3242 train_loss:4.0317 train_time:134128ms step_avg:410.18ms step:338/3242 train_loss:4.0645 train_time:134528ms step_avg:410.15ms step:339/3242 train_loss:4.0199 train_time:134930ms step_avg:410.12ms step:340/3242 train_loss:3.9801 train_time:135331ms step_avg:410.09ms step:341/3242 train_loss:3.8493 train_time:135731ms step_avg:410.06ms step:342/3242 train_loss:3.9256 train_time:136131ms step_avg:410.03ms step:343/3242 train_loss:4.0546 train_time:136532ms step_avg:410.01ms step:344/3242 train_loss:3.9511 train_time:136933ms step_avg:409.98ms step:345/3242 train_loss:4.0253 train_time:137333ms step_avg:409.95ms step:346/3242 train_loss:3.9971 train_time:137736ms step_avg:409.93ms step:347/3242 train_loss:4.0143 train_time:138137ms step_avg:409.90ms step:348/3242 train_loss:3.9037 train_time:138538ms step_avg:409.87ms step:349/3242 train_loss:3.9177 train_time:138937ms step_avg:409.84ms step:350/3242 train_loss:3.9915 train_time:139339ms step_avg:409.82ms step:351/3242 train_loss:4.0385 train_time:139741ms step_avg:409.80ms step:352/3242 train_loss:3.9142 train_time:140142ms step_avg:409.77ms step:353/3242 train_loss:3.9303 train_time:140543ms step_avg:409.75ms step:354/3242 train_loss:3.9505 train_time:140945ms step_avg:409.72ms step:355/3242 train_loss:3.7899 train_time:141346ms step_avg:409.70ms step:356/3242 train_loss:4.0385 train_time:141747ms step_avg:409.67ms step:357/3242 train_loss:3.8649 train_time:142147ms step_avg:409.65ms step:358/3242 train_loss:3.9522 train_time:142547ms step_avg:409.62ms step:359/3242 train_loss:3.8364 train_time:142947ms step_avg:409.59ms step:360/3242 train_loss:3.7793 train_time:143349ms step_avg:409.57ms step:361/3242 train_loss:4.4336 train_time:143750ms step_avg:409.54ms step:362/3242 train_loss:4.5119 train_time:144151ms step_avg:409.52ms step:363/3242 train_loss:3.9280 train_time:144552ms step_avg:409.50ms step:364/3242 train_loss:3.7654 train_time:144953ms step_avg:409.47ms step:365/3242 train_loss:3.9109 train_time:145354ms step_avg:409.45ms step:366/3242 train_loss:3.7493 train_time:145755ms step_avg:409.42ms step:367/3242 train_loss:4.0992 train_time:146156ms step_avg:409.40ms step:368/3242 train_loss:3.9740 train_time:146557ms step_avg:409.38ms step:369/3242 train_loss:4.0926 train_time:146958ms step_avg:409.35ms step:370/3242 train_loss:4.0502 train_time:147357ms step_avg:409.32ms step:371/3242 train_loss:3.7236 train_time:147758ms step_avg:409.30ms step:372/3242 train_loss:4.0003 train_time:148158ms step_avg:409.28ms step:373/3242 train_loss:4.0163 train_time:148558ms step_avg:409.25ms step:374/3242 train_loss:3.8408 train_time:148959ms step_avg:409.23ms step:375/3242 train_loss:3.9933 train_time:149358ms step_avg:409.20ms step:375/3242 val_loss:3.9493 train_time:149386ms step_avg:409.28ms step:376/3242 train_loss:3.8926 train_time:149759ms step_avg:409.18ms step:377/3242 train_loss:3.7899 train_time:150160ms step_avg:409.16ms step:378/3242 train_loss:3.5056 train_time:150560ms step_avg:409.13ms step:379/3242 train_loss:3.7755 train_time:150961ms step_avg:409.11ms step:380/3242 train_loss:3.8842 train_time:151557ms step_avg:409.61ms step:381/3242 train_loss:3.9924 train_time:152159ms step_avg:410.13ms step:382/3242 train_loss:4.0085 train_time:152558ms step_avg:410.10ms step:383/3242 train_loss:3.8815 train_time:152959ms step_avg:410.08ms step:384/3242 train_loss:3.8802 train_time:153360ms step_avg:410.05ms step:385/3242 train_loss:3.8931 train_time:153760ms step_avg:410.03ms step:386/3242 train_loss:3.8620 train_time:154162ms step_avg:410.00ms step:387/3242 train_loss:4.0640 train_time:154563ms step_avg:409.98ms step:388/3242 train_loss:4.2942 train_time:154966ms step_avg:409.96ms step:389/3242 train_loss:4.0196 train_time:155367ms step_avg:409.94ms step:390/3242 train_loss:3.8512 train_time:155767ms step_avg:409.91ms step:391/3242 train_loss:3.9345 train_time:156168ms step_avg:409.89ms step:392/3242 train_loss:3.9513 train_time:156568ms step_avg:409.86ms step:393/3242 train_loss:4.1170 train_time:156968ms step_avg:409.84ms step:394/3242 train_loss:3.9033 train_time:157370ms step_avg:409.82ms step:395/3242 train_loss:3.8698 train_time:157769ms step_avg:409.79ms step:396/3242 train_loss:3.6892 train_time:158169ms step_avg:409.76ms step:397/3242 train_loss:4.0291 train_time:158569ms step_avg:409.74ms step:398/3242 train_loss:4.0204 train_time:158969ms step_avg:409.71ms step:399/3242 train_loss:4.0566 train_time:159370ms step_avg:409.69ms step:400/3242 train_loss:3.9245 train_time:159770ms step_avg:409.67ms step:401/3242 train_loss:4.0472 train_time:160170ms step_avg:409.64ms step:402/3242 train_loss:3.9625 train_time:160570ms step_avg:409.62ms step:403/3242 train_loss:4.0767 train_time:160969ms step_avg:409.59ms step:404/3242 train_loss:4.1344 train_time:161369ms step_avg:409.57ms step:405/3242 train_loss:3.9476 train_time:161769ms step_avg:409.54ms step:406/3242 train_loss:3.9354 train_time:162169ms step_avg:409.52ms step:407/3242 train_loss:4.1530 train_time:162569ms step_avg:409.49ms step:408/3242 train_loss:3.9409 train_time:162969ms step_avg:409.47ms step:409/3242 train_loss:3.8978 train_time:163369ms step_avg:409.45ms step:410/3242 train_loss:4.0428 train_time:163769ms step_avg:409.42ms step:411/3242 train_loss:3.9029 train_time:164173ms step_avg:409.41ms step:412/3242 train_loss:3.8631 train_time:164574ms step_avg:409.39ms step:413/3242 train_loss:4.4258 train_time:164976ms step_avg:409.37ms step:414/3242 train_loss:3.8819 train_time:165376ms step_avg:409.35ms step:415/3242 train_loss:4.0491 train_time:165776ms step_avg:409.32ms step:416/3242 train_loss:3.8500 train_time:166177ms step_avg:409.30ms step:417/3242 train_loss:3.8930 train_time:166577ms step_avg:409.28ms step:418/3242 train_loss:4.0982 train_time:166978ms step_avg:409.26ms step:419/3242 train_loss:3.8057 train_time:167378ms step_avg:409.24ms step:420/3242 train_loss:3.8605 train_time:167779ms step_avg:409.22ms step:421/3242 train_loss:3.6988 train_time:168179ms step_avg:409.20ms step:422/3242 train_loss:3.7004 train_time:168579ms step_avg:409.17ms step:423/3242 train_loss:3.9278 train_time:168981ms step_avg:409.16ms step:424/3242 train_loss:3.8692 train_time:169383ms step_avg:409.14ms step:425/3242 train_loss:3.8206 train_time:169784ms step_avg:409.12ms step:426/3242 train_loss:3.7954 train_time:170185ms step_avg:409.10ms step:427/3242 train_loss:3.8160 train_time:170587ms step_avg:409.08ms step:428/3242 train_loss:3.8182 train_time:170989ms step_avg:409.06ms step:429/3242 train_loss:3.8651 train_time:171389ms step_avg:409.04ms step:430/3242 train_loss:3.8910 train_time:171788ms step_avg:409.02ms step:431/3242 train_loss:3.9104 train_time:172190ms step_avg:409.00ms step:432/3242 train_loss:3.7613 train_time:172588ms step_avg:408.98ms step:433/3242 train_loss:3.9010 train_time:172988ms step_avg:408.96ms step:434/3242 train_loss:3.9987 train_time:173389ms step_avg:408.94ms step:435/3242 train_loss:3.8889 train_time:173791ms step_avg:408.92ms step:436/3242 train_loss:3.9170 train_time:174190ms step_avg:408.90ms step:437/3242 train_loss:3.6581 train_time:174590ms step_avg:408.88ms step:438/3242 train_loss:3.7678 train_time:174990ms step_avg:408.85ms step:439/3242 train_loss:3.8482 train_time:175389ms step_avg:408.83ms step:440/3242 train_loss:3.7931 train_time:175787ms step_avg:408.81ms step:441/3242 train_loss:4.1693 train_time:176187ms step_avg:408.79ms step:442/3242 train_loss:3.7753 train_time:176586ms step_avg:408.76ms step:443/3242 train_loss:3.8709 train_time:176987ms step_avg:408.75ms step:444/3242 train_loss:3.7714 train_time:177386ms step_avg:408.72ms step:445/3242 train_loss:3.8323 train_time:177786ms step_avg:408.70ms step:446/3242 train_loss:3.8946 train_time:178186ms step_avg:408.68ms step:447/3242 train_loss:3.8805 train_time:178585ms step_avg:408.66ms step:448/3242 train_loss:3.9498 train_time:178986ms step_avg:408.64ms step:449/3242 train_loss:3.9771 train_time:179386ms step_avg:408.63ms step:450/3242 train_loss:3.8757 train_time:179786ms step_avg:408.60ms step:451/3242 train_loss:3.8668 train_time:180185ms step_avg:408.58ms step:452/3242 train_loss:3.7945 train_time:180584ms step_avg:408.56ms step:453/3242 train_loss:3.7292 train_time:180984ms step_avg:408.54ms step:454/3242 train_loss:3.7420 train_time:181383ms step_avg:408.52ms step:455/3242 train_loss:3.7338 train_time:181783ms step_avg:408.50ms step:456/3242 train_loss:4.0024 train_time:182183ms step_avg:408.48ms step:457/3242 train_loss:3.8511 train_time:182583ms step_avg:408.46ms step:458/3242 train_loss:3.8697 train_time:182983ms step_avg:408.44ms step:459/3242 train_loss:4.0349 train_time:183384ms step_avg:408.43ms step:460/3242 train_loss:3.6821 train_time:183782ms step_avg:408.40ms step:461/3242 train_loss:3.9599 train_time:184181ms step_avg:408.38ms step:462/3242 train_loss:3.8649 train_time:184581ms step_avg:408.37ms step:463/3242 train_loss:3.9231 train_time:184983ms step_avg:408.35ms step:464/3242 train_loss:3.9224 train_time:185383ms step_avg:408.33ms step:465/3242 train_loss:3.8020 train_time:185784ms step_avg:408.32ms step:466/3242 train_loss:3.8300 train_time:186184ms step_avg:408.30ms step:467/3242 train_loss:3.9884 train_time:186585ms step_avg:408.28ms step:468/3242 train_loss:4.1592 train_time:186986ms step_avg:408.27ms step:469/3242 train_loss:3.7813 train_time:187388ms step_avg:408.25ms step:470/3242 train_loss:3.7702 train_time:187789ms step_avg:408.24ms step:471/3242 train_loss:3.8803 train_time:188189ms step_avg:408.22ms step:472/3242 train_loss:3.8325 train_time:188589ms step_avg:408.20ms step:473/3242 train_loss:3.9209 train_time:188990ms step_avg:408.19ms step:474/3242 train_loss:3.8000 train_time:189389ms step_avg:408.17ms step:475/3242 train_loss:3.6752 train_time:189787ms step_avg:408.14ms step:476/3242 train_loss:3.9692 train_time:190185ms step_avg:408.12ms step:477/3242 train_loss:3.8077 train_time:190584ms step_avg:408.10ms step:478/3242 train_loss:3.8845 train_time:190983ms step_avg:408.08ms step:479/3242 train_loss:4.0205 train_time:191382ms step_avg:408.06ms step:480/3242 train_loss:3.8711 train_time:191780ms step_avg:408.04ms step:481/3242 train_loss:3.8134 train_time:192180ms step_avg:408.02ms step:482/3242 train_loss:3.8935 train_time:192580ms step_avg:408.01ms step:483/3242 train_loss:3.6486 train_time:192980ms step_avg:407.99ms step:484/3242 train_loss:3.8633 train_time:193380ms step_avg:407.97ms step:485/3242 train_loss:3.8230 train_time:193779ms step_avg:407.96ms step:486/3242 train_loss:3.8135 train_time:194177ms step_avg:407.94ms step:487/3242 train_loss:3.7429 train_time:194576ms step_avg:407.92ms step:488/3242 train_loss:3.8011 train_time:194975ms step_avg:407.90ms step:489/3242 train_loss:3.9190 train_time:195377ms step_avg:407.89ms step:490/3242 train_loss:3.9206 train_time:195777ms step_avg:407.87ms step:491/3242 train_loss:3.8127 train_time:196177ms step_avg:407.85ms step:492/3242 train_loss:3.7030 train_time:196577ms step_avg:407.84ms step:493/3242 train_loss:4.1148 train_time:196976ms step_avg:407.82ms step:494/3242 train_loss:3.6577 train_time:197375ms step_avg:407.80ms step:495/3242 train_loss:3.8805 train_time:197774ms step_avg:407.78ms step:496/3242 train_loss:3.9319 train_time:198172ms step_avg:407.76ms step:497/3242 train_loss:3.6913 train_time:198571ms step_avg:407.74ms step:498/3242 train_loss:3.8065 train_time:198970ms step_avg:407.73ms step:499/3242 train_loss:3.9078 train_time:199369ms step_avg:407.71ms step:500/3242 train_loss:3.9362 train_time:199767ms step_avg:407.69ms step:500/3242 val_loss:3.8311 train_time:199794ms step_avg:407.74ms step:501/3242 train_loss:4.0304 train_time:200167ms step_avg:407.67ms step:502/3242 train_loss:3.7727 train_time:200566ms step_avg:407.65ms step:503/3242 train_loss:3.9547 train_time:200966ms step_avg:407.64ms step:504/3242 train_loss:3.8045 train_time:201364ms step_avg:407.62ms step:505/3242 train_loss:3.8610 train_time:201764ms step_avg:407.60ms step:506/3242 train_loss:3.8318 train_time:202165ms step_avg:407.59ms step:507/3242 train_loss:3.7113 train_time:202564ms step_avg:407.57ms step:508/3242 train_loss:3.9941 train_time:202963ms step_avg:407.56ms step:509/3242 train_loss:4.1837 train_time:203362ms step_avg:407.54ms step:510/3242 train_loss:3.8167 train_time:203762ms step_avg:407.52ms step:511/3242 train_loss:3.8233 train_time:204161ms step_avg:407.51ms step:512/3242 train_loss:3.7824 train_time:204560ms step_avg:407.49ms step:513/3242 train_loss:3.9077 train_time:204961ms step_avg:407.48ms step:514/3242 train_loss:3.8752 train_time:205360ms step_avg:407.46ms step:515/3242 train_loss:3.8773 train_time:205760ms step_avg:407.45ms step:516/3242 train_loss:3.8255 train_time:206159ms step_avg:407.43ms step:517/3242 train_loss:3.9905 train_time:206559ms step_avg:407.41ms step:518/3242 train_loss:3.6901 train_time:206960ms step_avg:407.40ms step:519/3242 train_loss:3.9687 train_time:207359ms step_avg:407.38ms step:520/3242 train_loss:3.9156 train_time:207758ms step_avg:407.37ms step:521/3242 train_loss:3.8341 train_time:208159ms step_avg:407.36ms step:522/3242 train_loss:3.7206 train_time:208558ms step_avg:407.34ms step:523/3242 train_loss:3.7470 train_time:208958ms step_avg:407.33ms step:524/3242 train_loss:5.1683 train_time:209356ms step_avg:407.31ms step:525/3242 train_loss:3.7633 train_time:209756ms step_avg:407.29ms step:526/3242 train_loss:3.8691 train_time:210154ms step_avg:407.28ms step:527/3242 train_loss:3.7482 train_time:210554ms step_avg:407.26ms step:528/3242 train_loss:3.6315 train_time:210953ms step_avg:407.24ms step:529/3242 train_loss:3.7542 train_time:211351ms step_avg:407.23ms step:530/3242 train_loss:3.9834 train_time:211750ms step_avg:407.21ms step:531/3242 train_loss:3.7406 train_time:212148ms step_avg:407.19ms step:532/3242 train_loss:3.9542 train_time:212545ms step_avg:407.18ms step:533/3242 train_loss:3.8794 train_time:212945ms step_avg:407.16ms step:534/3242 train_loss:3.7752 train_time:213344ms step_avg:407.14ms step:535/3242 train_loss:3.7316 train_time:213743ms step_avg:407.13ms step:536/3242 train_loss:3.6929 train_time:214144ms step_avg:407.12ms step:537/3242 train_loss:3.8429 train_time:214546ms step_avg:407.11ms step:538/3242 train_loss:3.8423 train_time:214945ms step_avg:407.09ms step:539/3242 train_loss:3.7596 train_time:215344ms step_avg:407.08ms step:540/3242 train_loss:4.3914 train_time:215743ms step_avg:407.06ms step:541/3242 train_loss:3.8086 train_time:216143ms step_avg:407.05ms step:542/3242 train_loss:3.9517 train_time:216542ms step_avg:407.03ms step:543/3242 train_loss:3.7116 train_time:216943ms step_avg:407.02ms step:544/3242 train_loss:3.7702 train_time:217342ms step_avg:407.01ms step:545/3242 train_loss:3.7843 train_time:217741ms step_avg:406.99ms step:546/3242 train_loss:3.6918 train_time:218140ms step_avg:406.98ms step:547/3242 train_loss:3.7662 train_time:218539ms step_avg:406.96ms step:548/3242 train_loss:3.7292 train_time:218938ms step_avg:406.95ms step:549/3242 train_loss:3.8402 train_time:219339ms step_avg:406.94ms step:550/3242 train_loss:3.7806 train_time:219737ms step_avg:406.92ms step:551/3242 train_loss:3.8106 train_time:220137ms step_avg:406.91ms step:552/3242 train_loss:3.7807 train_time:220536ms step_avg:406.89ms step:553/3242 train_loss:3.6837 train_time:220937ms step_avg:406.88ms step:554/3242 train_loss:4.0615 train_time:221336ms step_avg:406.87ms step:555/3242 train_loss:3.8142 train_time:221735ms step_avg:406.85ms step:556/3242 train_loss:3.7871 train_time:222135ms step_avg:406.84ms step:557/3242 train_loss:3.8184 train_time:222533ms step_avg:406.83ms step:558/3242 train_loss:3.3450 train_time:222932ms step_avg:406.81ms step:559/3242 train_loss:3.7619 train_time:223331ms step_avg:406.80ms step:560/3242 train_loss:3.7403 train_time:223729ms step_avg:406.78ms step:561/3242 train_loss:3.9306 train_time:224127ms step_avg:406.76ms step:562/3242 train_loss:3.7351 train_time:224526ms step_avg:406.75ms step:563/3242 train_loss:3.6093 train_time:224924ms step_avg:406.73ms step:564/3242 train_loss:3.9082 train_time:225322ms step_avg:406.72ms step:565/3242 train_loss:3.6022 train_time:225720ms step_avg:406.70ms step:566/3242 train_loss:3.8046 train_time:226119ms step_avg:406.69ms step:567/3242 train_loss:3.6951 train_time:226518ms step_avg:406.67ms step:568/3242 train_loss:3.7263 train_time:226915ms step_avg:406.66ms step:569/3242 train_loss:3.6794 train_time:227314ms step_avg:406.64ms step:570/3242 train_loss:3.7181 train_time:227905ms step_avg:406.97ms step:571/3242 train_loss:3.7709 train_time:228312ms step_avg:406.97ms step:572/3242 train_loss:3.6030 train_time:228902ms step_avg:407.30ms step:573/3242 train_loss:3.6763 train_time:229303ms step_avg:407.29ms step:574/3242 train_loss:3.6033 train_time:229702ms step_avg:407.27ms step:575/3242 train_loss:4.0772 train_time:230100ms step_avg:407.26ms step:576/3242 train_loss:3.6426 train_time:230500ms step_avg:407.24ms step:577/3242 train_loss:3.6984 train_time:230899ms step_avg:407.23ms step:578/3242 train_loss:3.9739 train_time:231297ms step_avg:407.21ms step:579/3242 train_loss:3.6447 train_time:231696ms step_avg:407.20ms step:580/3242 train_loss:3.7142 train_time:232095ms step_avg:407.18ms step:581/3242 train_loss:3.9229 train_time:232494ms step_avg:407.17ms step:582/3242 train_loss:4.0243 train_time:232891ms step_avg:407.15ms step:583/3242 train_loss:3.9422 train_time:233289ms step_avg:407.14ms step:584/3242 train_loss:3.8689 train_time:233688ms step_avg:407.12ms step:585/3242 train_loss:3.7581 train_time:234087ms step_avg:407.11ms step:586/3242 train_loss:3.7343 train_time:234486ms step_avg:407.09ms step:587/3242 train_loss:3.8335 train_time:234884ms step_avg:407.08ms step:588/3242 train_loss:4.4401 train_time:235286ms step_avg:407.07ms step:589/3242 train_loss:3.6077 train_time:235684ms step_avg:407.05ms step:590/3242 train_loss:3.8941 train_time:236083ms step_avg:407.04ms step:591/3242 train_loss:3.8805 train_time:236483ms step_avg:407.03ms step:592/3242 train_loss:3.6403 train_time:236881ms step_avg:407.01ms step:593/3242 train_loss:3.7119 train_time:237280ms step_avg:407.00ms step:594/3242 train_loss:3.9025 train_time:237678ms step_avg:406.98ms step:595/3242 train_loss:3.6322 train_time:238077ms step_avg:406.97ms step:596/3242 train_loss:3.8300 train_time:238477ms step_avg:406.96ms step:597/3242 train_loss:3.8252 train_time:238876ms step_avg:406.94ms step:598/3242 train_loss:3.7779 train_time:239275ms step_avg:406.93ms step:599/3242 train_loss:3.7895 train_time:239674ms step_avg:406.92ms step:600/3242 train_loss:3.8801 train_time:240072ms step_avg:406.90ms step:601/3242 train_loss:3.6712 train_time:240471ms step_avg:406.89ms step:602/3242 train_loss:3.6341 train_time:240870ms step_avg:406.87ms step:603/3242 train_loss:3.8218 train_time:241269ms step_avg:406.86ms step:604/3242 train_loss:3.7423 train_time:241669ms step_avg:406.85ms step:605/3242 train_loss:3.8408 train_time:242068ms step_avg:406.84ms step:606/3242 train_loss:4.3457 train_time:242467ms step_avg:406.82ms step:607/3242 train_loss:4.0544 train_time:242866ms step_avg:406.81ms step:608/3242 train_loss:3.8247 train_time:243265ms step_avg:406.80ms step:609/3242 train_loss:3.7679 train_time:243664ms step_avg:406.78ms step:610/3242 train_loss:3.7278 train_time:244064ms step_avg:406.77ms step:611/3242 train_loss:3.7155 train_time:244463ms step_avg:406.76ms step:612/3242 train_loss:3.6754 train_time:244862ms step_avg:406.75ms step:613/3242 train_loss:3.8414 train_time:245261ms step_avg:406.73ms step:614/3242 train_loss:3.6300 train_time:245659ms step_avg:406.72ms step:615/3242 train_loss:3.7171 train_time:246058ms step_avg:406.71ms step:616/3242 train_loss:3.6306 train_time:246457ms step_avg:406.69ms step:617/3242 train_loss:3.9666 train_time:246856ms step_avg:406.68ms step:618/3242 train_loss:3.6446 train_time:247256ms step_avg:406.67ms step:619/3242 train_loss:3.5917 train_time:247656ms step_avg:406.66ms step:620/3242 train_loss:3.9514 train_time:248055ms step_avg:406.65ms step:621/3242 train_loss:3.8303 train_time:248454ms step_avg:406.63ms step:622/3242 train_loss:3.7674 train_time:248852ms step_avg:406.62ms step:623/3242 train_loss:3.6962 train_time:249251ms step_avg:406.61ms step:624/3242 train_loss:3.6840 train_time:249648ms step_avg:406.59ms step:625/3242 train_loss:3.8174 train_time:250048ms step_avg:406.58ms step:625/3242 val_loss:3.7432 train_time:250074ms step_avg:406.62ms step:626/3242 train_loss:3.7724 train_time:250446ms step_avg:406.57ms step:627/3242 train_loss:3.7270 train_time:250845ms step_avg:406.56ms step:628/3242 train_loss:3.6400 train_time:251243ms step_avg:406.54ms step:629/3242 train_loss:3.8218 train_time:251642ms step_avg:406.53ms step:630/3242 train_loss:3.6144 train_time:252041ms step_avg:406.52ms step:631/3242 train_loss:3.7576 train_time:252440ms step_avg:406.51ms step:632/3242 train_loss:3.5650 train_time:252838ms step_avg:406.49ms step:633/3242 train_loss:3.7707 train_time:253238ms step_avg:406.48ms step:634/3242 train_loss:3.7529 train_time:253637ms step_avg:406.47ms step:635/3242 train_loss:3.6505 train_time:254035ms step_avg:406.46ms step:636/3242 train_loss:3.5631 train_time:254434ms step_avg:406.44ms step:637/3242 train_loss:3.7908 train_time:254834ms step_avg:406.43ms step:638/3242 train_loss:3.5695 train_time:255233ms step_avg:406.42ms step:639/3242 train_loss:3.6418 train_time:255632ms step_avg:406.41ms step:640/3242 train_loss:3.7351 train_time:256033ms step_avg:406.40ms step:641/3242 train_loss:3.7820 train_time:256432ms step_avg:406.39ms step:642/3242 train_loss:3.6244 train_time:256831ms step_avg:406.38ms step:643/3242 train_loss:4.0431 train_time:257231ms step_avg:406.37ms step:644/3242 train_loss:3.6045 train_time:257630ms step_avg:406.36ms step:645/3242 train_loss:3.8079 train_time:258029ms step_avg:406.34ms step:646/3242 train_loss:3.5914 train_time:258426ms step_avg:406.33ms step:647/3242 train_loss:3.6437 train_time:258827ms step_avg:406.32ms step:648/3242 train_loss:3.7198 train_time:259224ms step_avg:406.31ms step:649/3242 train_loss:3.9329 train_time:259623ms step_avg:406.30ms step:650/3242 train_loss:3.7906 train_time:260022ms step_avg:406.28ms step:651/3242 train_loss:3.4076 train_time:260420ms step_avg:406.27ms step:652/3242 train_loss:3.4917 train_time:260819ms step_avg:406.26ms step:653/3242 train_loss:3.8061 train_time:261219ms step_avg:406.25ms step:654/3242 train_loss:3.6244 train_time:261617ms step_avg:406.24ms step:655/3242 train_loss:3.8365 train_time:262014ms step_avg:406.22ms step:656/3242 train_loss:3.8378 train_time:262412ms step_avg:406.21ms step:657/3242 train_loss:3.9493 train_time:262812ms step_avg:406.20ms step:658/3242 train_loss:3.7094 train_time:263211ms step_avg:406.19ms step:659/3242 train_loss:3.6359 train_time:263609ms step_avg:406.18ms step:660/3242 train_loss:3.7407 train_time:264006ms step_avg:406.16ms step:661/3242 train_loss:3.8267 train_time:264406ms step_avg:406.15ms step:662/3242 train_loss:3.8898 train_time:264803ms step_avg:406.14ms step:663/3242 train_loss:3.6739 train_time:265202ms step_avg:406.13ms step:664/3242 train_loss:3.7237 train_time:265601ms step_avg:406.12ms step:665/3242 train_loss:3.7026 train_time:266000ms step_avg:406.11ms step:666/3242 train_loss:3.6163 train_time:266398ms step_avg:406.09ms step:667/3242 train_loss:3.7692 train_time:266796ms step_avg:406.08ms step:668/3242 train_loss:3.8402 train_time:267197ms step_avg:406.07ms step:669/3242 train_loss:3.7364 train_time:267596ms step_avg:406.06ms step:670/3242 train_loss:3.6642 train_time:267995ms step_avg:406.05ms step:671/3242 train_loss:3.8577 train_time:268395ms step_avg:406.04ms step:672/3242 train_loss:3.7417 train_time:268794ms step_avg:406.03ms step:673/3242 train_loss:3.5541 train_time:269193ms step_avg:406.02ms step:674/3242 train_loss:3.9396 train_time:269593ms step_avg:406.01ms step:675/3242 train_loss:3.8314 train_time:269992ms step_avg:406.00ms step:676/3242 train_loss:3.7956 train_time:270391ms step_avg:405.99ms step:677/3242 train_loss:3.6694 train_time:270790ms step_avg:405.98ms step:678/3242 train_loss:3.6693 train_time:271189ms step_avg:405.97ms step:679/3242 train_loss:3.5774 train_time:271588ms step_avg:405.96ms step:680/3242 train_loss:3.7045 train_time:271987ms step_avg:405.95ms step:681/3242 train_loss:3.7611 train_time:272387ms step_avg:405.94ms step:682/3242 train_loss:3.9032 train_time:272787ms step_avg:405.93ms step:683/3242 train_loss:3.6672 train_time:273187ms step_avg:405.92ms step:684/3242 train_loss:3.6159 train_time:273586ms step_avg:405.91ms step:685/3242 train_loss:3.9269 train_time:273985ms step_avg:405.90ms step:686/3242 train_loss:3.5904 train_time:274385ms step_avg:405.89ms step:687/3242 train_loss:3.7520 train_time:274784ms step_avg:405.88ms step:688/3242 train_loss:3.8134 train_time:275183ms step_avg:405.87ms step:689/3242 train_loss:3.6283 train_time:275582ms step_avg:405.86ms step:690/3242 train_loss:3.6991 train_time:275981ms step_avg:405.85ms step:691/3242 train_loss:3.7180 train_time:276380ms step_avg:405.84ms step:692/3242 train_loss:3.7595 train_time:276780ms step_avg:405.84ms step:693/3242 train_loss:3.8244 train_time:277179ms step_avg:405.83ms step:694/3242 train_loss:3.4446 train_time:277579ms step_avg:405.82ms step:695/3242 train_loss:3.7355 train_time:277978ms step_avg:405.81ms step:696/3242 train_loss:3.7481 train_time:278377ms step_avg:405.80ms step:697/3242 train_loss:3.5203 train_time:278775ms step_avg:405.79ms step:698/3242 train_loss:3.8497 train_time:279173ms step_avg:405.78ms step:699/3242 train_loss:3.7193 train_time:279572ms step_avg:405.76ms step:700/3242 train_loss:3.7106 train_time:279971ms step_avg:405.75ms step:701/3242 train_loss:3.6511 train_time:280370ms step_avg:405.74ms step:702/3242 train_loss:3.7205 train_time:280771ms step_avg:405.74ms step:703/3242 train_loss:2.8503 train_time:281170ms step_avg:405.73ms step:704/3242 train_loss:3.7341 train_time:281569ms step_avg:405.72ms step:705/3242 train_loss:3.6169 train_time:281968ms step_avg:405.71ms step:706/3242 train_loss:3.8286 train_time:282366ms step_avg:405.70ms step:707/3242 train_loss:3.6986 train_time:282765ms step_avg:405.69ms step:708/3242 train_loss:3.6042 train_time:283163ms step_avg:405.68ms step:709/3242 train_loss:3.6393 train_time:283561ms step_avg:405.67ms step:710/3242 train_loss:3.6456 train_time:283961ms step_avg:405.66ms step:711/3242 train_loss:3.9365 train_time:284359ms step_avg:405.65ms step:712/3242 train_loss:3.6538 train_time:284759ms step_avg:405.64ms step:713/3242 train_loss:3.6968 train_time:285159ms step_avg:405.63ms step:714/3242 train_loss:3.6848 train_time:285559ms step_avg:405.62ms step:715/3242 train_loss:3.7040 train_time:285958ms step_avg:405.61ms step:716/3242 train_loss:3.5075 train_time:286355ms step_avg:405.60ms step:717/3242 train_loss:4.0020 train_time:286754ms step_avg:405.59ms step:718/3242 train_loss:3.8723 train_time:287151ms step_avg:405.58ms step:719/3242 train_loss:3.6758 train_time:287551ms step_avg:405.57ms step:720/3242 train_loss:3.8020 train_time:287950ms step_avg:405.56ms step:721/3242 train_loss:3.8451 train_time:288348ms step_avg:405.55ms step:722/3242 train_loss:3.5756 train_time:288746ms step_avg:405.54ms step:723/3242 train_loss:3.9327 train_time:289323ms step_avg:405.78ms step:724/3242 train_loss:3.6187 train_time:289721ms step_avg:405.77ms step:725/3242 train_loss:3.6742 train_time:290120ms step_avg:405.76ms step:726/3242 train_loss:3.7364 train_time:290517ms step_avg:405.75ms step:727/3242 train_loss:3.6301 train_time:290915ms step_avg:405.74ms step:728/3242 train_loss:3.7154 train_time:291313ms step_avg:405.73ms step:729/3242 train_loss:3.5903 train_time:291711ms step_avg:405.72ms step:730/3242 train_loss:3.4562 train_time:292112ms step_avg:405.71ms step:731/3242 train_loss:3.7068 train_time:292511ms step_avg:405.70ms step:732/3242 train_loss:3.6101 train_time:292910ms step_avg:405.69ms step:733/3242 train_loss:3.7840 train_time:293310ms step_avg:405.69ms step:734/3242 train_loss:3.7678 train_time:293708ms step_avg:405.67ms step:735/3242 train_loss:3.5498 train_time:294106ms step_avg:405.66ms step:736/3242 train_loss:3.5991 train_time:294504ms step_avg:405.65ms step:737/3242 train_loss:3.5818 train_time:294902ms step_avg:405.64ms step:738/3242 train_loss:3.9326 train_time:295302ms step_avg:405.64ms step:739/3242 train_loss:3.7069 train_time:295702ms step_avg:405.63ms step:740/3242 train_loss:3.7551 train_time:296101ms step_avg:405.62ms step:741/3242 train_loss:3.8237 train_time:296499ms step_avg:405.61ms step:742/3242 train_loss:3.6808 train_time:296897ms step_avg:405.60ms step:743/3242 train_loss:3.7792 train_time:297297ms step_avg:405.59ms step:744/3242 train_loss:3.5087 train_time:297695ms step_avg:405.58ms step:745/3242 train_loss:3.8691 train_time:298094ms step_avg:405.57ms step:746/3242 train_loss:3.6300 train_time:298494ms step_avg:405.56ms step:747/3242 train_loss:3.6434 train_time:298893ms step_avg:405.55ms step:748/3242 train_loss:3.7607 train_time:299291ms step_avg:405.54ms step:749/3242 train_loss:3.7537 train_time:299690ms step_avg:405.53ms step:750/3242 train_loss:3.6719 train_time:300088ms step_avg:405.52ms step:750/3242 val_loss:3.6804 train_time:300115ms step_avg:405.56ms step:751/3242 train_loss:3.6915 train_time:300487ms step_avg:405.52ms step:752/3242 train_loss:3.8525 train_time:300886ms step_avg:405.51ms step:753/3242 train_loss:3.5975 train_time:301284ms step_avg:405.50ms step:754/3242 train_loss:3.7607 train_time:301683ms step_avg:405.49ms step:755/3242 train_loss:3.6931 train_time:302082ms step_avg:405.48ms step:756/3242 train_loss:3.6701 train_time:302481ms step_avg:405.47ms step:757/3242 train_loss:3.7093 train_time:302880ms step_avg:405.46ms step:758/3242 train_loss:3.7079 train_time:303278ms step_avg:405.45ms step:759/3242 train_loss:3.7267 train_time:303676ms step_avg:405.44ms step:760/3242 train_loss:3.7355 train_time:304265ms step_avg:405.69ms step:761/3242 train_loss:3.6964 train_time:304672ms step_avg:405.69ms step:762/3242 train_loss:3.9117 train_time:305261ms step_avg:405.93ms step:763/3242 train_loss:3.5665 train_time:305659ms step_avg:405.92ms step:764/3242 train_loss:3.7428 train_time:306057ms step_avg:405.91ms step:765/3242 train_loss:3.6381 train_time:306454ms step_avg:405.90ms step:766/3242 train_loss:3.4730 train_time:306852ms step_avg:405.89ms step:767/3242 train_loss:3.9385 train_time:307251ms step_avg:405.88ms step:768/3242 train_loss:3.7507 train_time:307649ms step_avg:405.87ms step:769/3242 train_loss:3.7566 train_time:308048ms step_avg:405.86ms step:770/3242 train_loss:3.6455 train_time:308446ms step_avg:405.85ms step:771/3242 train_loss:3.8693 train_time:308844ms step_avg:405.84ms step:772/3242 train_loss:3.7814 train_time:309243ms step_avg:405.83ms step:773/3242 train_loss:4.0606 train_time:309641ms step_avg:405.82ms step:774/3242 train_loss:3.5674 train_time:310039ms step_avg:405.81ms step:775/3242 train_loss:3.6692 train_time:310437ms step_avg:405.80ms step:776/3242 train_loss:3.8188 train_time:310834ms step_avg:405.79ms step:777/3242 train_loss:3.7862 train_time:311234ms step_avg:405.78ms step:778/3242 train_loss:3.6534 train_time:311633ms step_avg:405.77ms step:779/3242 train_loss:3.4871 train_time:312204ms step_avg:405.99ms step:780/3242 train_loss:3.6901 train_time:312602ms step_avg:405.98ms step:781/3242 train_loss:3.5958 train_time:313001ms step_avg:405.97ms step:782/3242 train_loss:3.7434 train_time:313399ms step_avg:405.96ms step:783/3242 train_loss:3.8012 train_time:313796ms step_avg:405.95ms step:784/3242 train_loss:3.6548 train_time:314195ms step_avg:405.94ms step:785/3242 train_loss:3.7031 train_time:314593ms step_avg:405.93ms step:786/3242 train_loss:3.6757 train_time:314994ms step_avg:405.92ms step:787/3242 train_loss:3.6411 train_time:315392ms step_avg:405.91ms step:788/3242 train_loss:3.5206 train_time:315791ms step_avg:405.90ms step:789/3242 train_loss:4.0737 train_time:316334ms step_avg:406.08ms step:790/3242 train_loss:3.5339 train_time:316733ms step_avg:406.07ms step:791/3242 train_loss:3.5980 train_time:317131ms step_avg:406.06ms step:792/3242 train_loss:3.7360 train_time:317530ms step_avg:406.05ms step:793/3242 train_loss:3.6666 train_time:317929ms step_avg:406.04ms step:794/3242 train_loss:3.5878 train_time:318328ms step_avg:406.03ms step:795/3242 train_loss:3.4927 train_time:318725ms step_avg:406.02ms step:796/3242 train_loss:3.6172 train_time:319124ms step_avg:406.01ms step:797/3242 train_loss:3.6962 train_time:319521ms step_avg:406.00ms step:798/3242 train_loss:3.7094 train_time:319919ms step_avg:405.99ms step:799/3242 train_loss:3.5998 train_time:320316ms step_avg:405.98ms step:800/3242 train_loss:3.8270 train_time:320714ms step_avg:405.97ms step:801/3242 train_loss:3.6580 train_time:321275ms step_avg:406.16ms step:802/3242 train_loss:3.6730 train_time:321673ms step_avg:406.15ms step:803/3242 train_loss:3.7665 train_time:322071ms step_avg:406.14ms step:804/3242 train_loss:3.7694 train_time:322470ms step_avg:406.13ms step:805/3242 train_loss:3.8042 train_time:323030ms step_avg:406.33ms step:806/3242 train_loss:3.7611 train_time:323435ms step_avg:406.33ms step:807/3242 train_loss:3.5877 train_time:323833ms step_avg:406.31ms step:808/3242 train_loss:3.4444 train_time:324232ms step_avg:406.31ms step:809/3242 train_loss:3.7484 train_time:324631ms step_avg:406.30ms step:810/3242 train_loss:3.7546 train_time:325030ms step_avg:406.29ms step:811/3242 train_loss:3.6030 train_time:325429ms step_avg:406.28ms step:812/3242 train_loss:3.6512 train_time:325829ms step_avg:406.27ms step:813/3242 train_loss:3.7606 train_time:326226ms step_avg:406.26ms step:814/3242 train_loss:3.7255 train_time:326624ms step_avg:406.25ms step:815/3242 train_loss:3.6922 train_time:327023ms step_avg:406.24ms step:816/3242 train_loss:3.7404 train_time:327423ms step_avg:406.23ms step:817/3242 train_loss:3.6621 train_time:327821ms step_avg:406.22ms step:818/3242 train_loss:3.7139 train_time:328219ms step_avg:406.21ms step:819/3242 train_loss:4.0538 train_time:328617ms step_avg:406.20ms step:820/3242 train_loss:3.6351 train_time:329015ms step_avg:406.19ms step:821/3242 train_loss:3.8991 train_time:329414ms step_avg:406.18ms step:822/3242 train_loss:3.5127 train_time:329813ms step_avg:406.17ms step:823/3242 train_loss:3.6213 train_time:330212ms step_avg:406.16ms step:824/3242 train_loss:3.9053 train_time:330617ms step_avg:406.16ms step:825/3242 train_loss:3.6652 train_time:331016ms step_avg:406.15ms step:826/3242 train_loss:3.6382 train_time:331414ms step_avg:406.14ms step:827/3242 train_loss:3.7814 train_time:331812ms step_avg:406.13ms step:828/3242 train_loss:3.5976 train_time:332211ms step_avg:406.13ms step:829/3242 train_loss:4.0753 train_time:332611ms step_avg:406.12ms step:830/3242 train_loss:3.6691 train_time:333011ms step_avg:406.11ms step:831/3242 train_loss:3.7341 train_time:333410ms step_avg:406.10ms step:832/3242 train_loss:3.6345 train_time:333808ms step_avg:406.09ms step:833/3242 train_loss:3.6157 train_time:334206ms step_avg:406.08ms step:834/3242 train_loss:3.5671 train_time:334605ms step_avg:406.07ms step:835/3242 train_loss:3.6656 train_time:335002ms step_avg:406.06ms step:836/3242 train_loss:3.5515 train_time:335401ms step_avg:406.05ms step:837/3242 train_loss:3.5147 train_time:335799ms step_avg:406.04ms step:838/3242 train_loss:3.8494 train_time:336196ms step_avg:406.03ms step:839/3242 train_loss:3.5242 train_time:336594ms step_avg:406.02ms step:840/3242 train_loss:3.6443 train_time:336992ms step_avg:406.01ms step:841/3242 train_loss:3.5346 train_time:337391ms step_avg:406.01ms step:842/3242 train_loss:3.6542 train_time:337788ms step_avg:406.00ms step:843/3242 train_loss:3.6685 train_time:338188ms step_avg:405.99ms step:844/3242 train_loss:3.7323 train_time:338586ms step_avg:405.98ms step:845/3242 train_loss:3.6334 train_time:338990ms step_avg:405.98ms step:846/3242 train_loss:3.5516 train_time:339545ms step_avg:406.15ms step:847/3242 train_loss:3.8329 train_time:339941ms step_avg:406.14ms step:848/3242 train_loss:3.5627 train_time:340340ms step_avg:406.13ms step:849/3242 train_loss:3.5408 train_time:340737ms step_avg:406.12ms step:850/3242 train_loss:3.6799 train_time:341136ms step_avg:406.11ms step:851/3242 train_loss:3.5936 train_time:341534ms step_avg:406.10ms step:852/3242 train_loss:3.6741 train_time:341932ms step_avg:406.09ms step:853/3242 train_loss:3.7863 train_time:342330ms step_avg:406.08ms step:854/3242 train_loss:3.5794 train_time:342727ms step_avg:406.08ms step:855/3242 train_loss:3.6578 train_time:343127ms step_avg:406.07ms step:856/3242 train_loss:3.8635 train_time:343524ms step_avg:406.06ms step:857/3242 train_loss:3.5779 train_time:343922ms step_avg:406.05ms step:858/3242 train_loss:3.5588 train_time:344319ms step_avg:406.04ms step:859/3242 train_loss:3.7031 train_time:344718ms step_avg:406.03ms step:860/3242 train_loss:3.4648 train_time:345119ms step_avg:406.02ms step:861/3242 train_loss:3.6665 train_time:345517ms step_avg:406.01ms step:862/3242 train_loss:3.5987 train_time:345917ms step_avg:406.01ms step:863/3242 train_loss:3.7698 train_time:346315ms step_avg:406.00ms step:864/3242 train_loss:3.6463 train_time:346714ms step_avg:405.99ms step:865/3242 train_loss:3.6809 train_time:347114ms step_avg:405.98ms step:866/3242 train_loss:3.5583 train_time:347512ms step_avg:405.97ms step:867/3242 train_loss:3.6606 train_time:347911ms step_avg:405.96ms step:868/3242 train_loss:4.1194 train_time:348310ms step_avg:405.96ms step:869/3242 train_loss:3.5217 train_time:348709ms step_avg:405.95ms step:870/3242 train_loss:3.8128 train_time:349107ms step_avg:405.94ms step:871/3242 train_loss:3.6281 train_time:349505ms step_avg:405.93ms step:872/3242 train_loss:3.5913 train_time:349904ms step_avg:405.92ms step:873/3242 train_loss:3.4578 train_time:350301ms step_avg:405.91ms step:874/3242 train_loss:3.8221 train_time:350700ms step_avg:405.90ms step:875/3242 train_loss:3.5247 train_time:351098ms step_avg:405.89ms step:875/3242 val_loss:3.6322 train_time:351125ms step_avg:405.92ms step:876/3242 train_loss:3.1895 train_time:351498ms step_avg:405.89ms step:877/3242 train_loss:3.7068 train_time:352056ms step_avg:406.06ms step:878/3242 train_loss:3.7058 train_time:352462ms step_avg:406.06ms step:879/3242 train_loss:3.6945 train_time:352862ms step_avg:406.05ms step:880/3242 train_loss:3.5689 train_time:353259ms step_avg:406.05ms step:881/3242 train_loss:3.6771 train_time:353660ms step_avg:406.04ms step:882/3242 train_loss:3.3337 train_time:354059ms step_avg:406.03ms step:883/3242 train_loss:3.6725 train_time:354456ms step_avg:406.02ms step:884/3242 train_loss:4.1254 train_time:354856ms step_avg:406.01ms step:885/3242 train_loss:3.9791 train_time:355255ms step_avg:406.01ms step:886/3242 train_loss:3.7759 train_time:355653ms step_avg:406.00ms step:887/3242 train_loss:3.6554 train_time:356052ms step_avg:405.99ms step:888/3242 train_loss:3.6760 train_time:356449ms step_avg:405.98ms step:889/3242 train_loss:4.5586 train_time:356851ms step_avg:405.97ms step:890/3242 train_loss:3.9316 train_time:357249ms step_avg:405.96ms step:891/3242 train_loss:3.5672 train_time:357649ms step_avg:405.96ms step:892/3242 train_loss:3.5924 train_time:358049ms step_avg:405.95ms step:893/3242 train_loss:3.5592 train_time:358448ms step_avg:405.94ms step:894/3242 train_loss:3.7453 train_time:358847ms step_avg:405.94ms step:895/3242 train_loss:3.4804 train_time:359246ms step_avg:405.93ms step:896/3242 train_loss:3.5887 train_time:359647ms step_avg:405.92ms step:897/3242 train_loss:3.6162 train_time:360047ms step_avg:405.92ms step:898/3242 train_loss:3.6096 train_time:360446ms step_avg:405.91ms step:899/3242 train_loss:3.6545 train_time:360844ms step_avg:405.90ms step:900/3242 train_loss:3.7822 train_time:361243ms step_avg:405.89ms step:901/3242 train_loss:3.5819 train_time:361642ms step_avg:405.88ms step:902/3242 train_loss:3.4822 train_time:362041ms step_avg:405.88ms step:903/3242 train_loss:3.6987 train_time:362438ms step_avg:405.87ms step:904/3242 train_loss:3.5711 train_time:362837ms step_avg:405.86ms step:905/3242 train_loss:3.5979 train_time:363235ms step_avg:405.85ms step:906/3242 train_loss:3.6348 train_time:363634ms step_avg:405.84ms step:907/3242 train_loss:3.6337 train_time:364032ms step_avg:405.83ms step:908/3242 train_loss:3.6633 train_time:364431ms step_avg:405.83ms step:909/3242 train_loss:3.6214 train_time:364831ms step_avg:405.82ms step:910/3242 train_loss:3.5691 train_time:365230ms step_avg:405.81ms step:911/3242 train_loss:3.5923 train_time:365629ms step_avg:405.80ms step:912/3242 train_loss:3.5970 train_time:366028ms step_avg:405.80ms step:913/3242 train_loss:3.6567 train_time:366427ms step_avg:405.79ms step:914/3242 train_loss:3.8648 train_time:366825ms step_avg:405.78ms step:915/3242 train_loss:3.4754 train_time:367225ms step_avg:405.77ms step:916/3242 train_loss:3.7311 train_time:367623ms step_avg:405.76ms step:917/3242 train_loss:3.7638 train_time:368023ms step_avg:405.76ms step:918/3242 train_loss:3.6762 train_time:368422ms step_avg:405.75ms step:919/3242 train_loss:3.6748 train_time:368822ms step_avg:405.74ms step:920/3242 train_loss:4.0239 train_time:369221ms step_avg:405.74ms step:921/3242 train_loss:3.4634 train_time:369619ms step_avg:405.73ms step:922/3242 train_loss:3.6160 train_time:370019ms step_avg:405.72ms step:923/3242 train_loss:3.6328 train_time:370417ms step_avg:405.71ms step:924/3242 train_loss:3.6268 train_time:370815ms step_avg:405.71ms step:925/3242 train_loss:3.7121 train_time:371214ms step_avg:405.70ms step:926/3242 train_loss:3.7376 train_time:371613ms step_avg:405.69ms step:927/3242 train_loss:3.7322 train_time:372012ms step_avg:405.68ms step:928/3242 train_loss:3.6992 train_time:372412ms step_avg:405.68ms step:929/3242 train_loss:3.7847 train_time:372811ms step_avg:405.67ms step:930/3242 train_loss:3.8649 train_time:373209ms step_avg:405.66ms step:931/3242 train_loss:3.5485 train_time:373607ms step_avg:405.65ms step:932/3242 train_loss:3.4781 train_time:374007ms step_avg:405.65ms step:933/3242 train_loss:3.5936 train_time:374405ms step_avg:405.64ms step:934/3242 train_loss:3.8217 train_time:374805ms step_avg:405.63ms step:935/3242 train_loss:3.4498 train_time:375204ms step_avg:405.63ms step:936/3242 train_loss:3.6824 train_time:375603ms step_avg:405.62ms step:937/3242 train_loss:3.5069 train_time:376002ms step_avg:405.61ms step:938/3242 train_loss:3.6065 train_time:376401ms step_avg:405.60ms step:939/3242 train_loss:3.6713 train_time:376799ms step_avg:405.60ms step:940/3242 train_loss:3.5549 train_time:377198ms step_avg:405.59ms step:941/3242 train_loss:3.8309 train_time:377597ms step_avg:405.58ms step:942/3242 train_loss:3.5675 train_time:377996ms step_avg:405.57ms step:943/3242 train_loss:3.6302 train_time:378395ms step_avg:405.57ms step:944/3242 train_loss:3.3855 train_time:378793ms step_avg:405.56ms step:945/3242 train_loss:3.7335 train_time:379192ms step_avg:405.55ms step:946/3242 train_loss:3.3501 train_time:379591ms step_avg:405.55ms step:947/3242 train_loss:3.4676 train_time:379993ms step_avg:405.54ms step:948/3242 train_loss:3.9976 train_time:380393ms step_avg:405.54ms step:949/3242 train_loss:3.9095 train_time:380793ms step_avg:405.53ms step:950/3242 train_loss:3.6512 train_time:381377ms step_avg:405.72ms step:951/3242 train_loss:3.6467 train_time:381785ms step_avg:405.72ms step:952/3242 train_loss:3.5756 train_time:382185ms step_avg:405.72ms step:953/3242 train_loss:3.7499 train_time:382774ms step_avg:405.91ms step:954/3242 train_loss:3.6834 train_time:383175ms step_avg:405.91ms step:955/3242 train_loss:3.3253 train_time:383574ms step_avg:405.90ms step:956/3242 train_loss:3.3878 train_time:383973ms step_avg:405.89ms step:957/3242 train_loss:3.5382 train_time:384371ms step_avg:405.88ms step:958/3242 train_loss:3.6249 train_time:384770ms step_avg:405.88ms step:959/3242 train_loss:3.3628 train_time:385168ms step_avg:405.87ms step:960/3242 train_loss:3.6276 train_time:385569ms step_avg:405.86ms step:961/3242 train_loss:3.7923 train_time:385968ms step_avg:405.85ms step:962/3242 train_loss:3.5721 train_time:386366ms step_avg:405.85ms step:963/3242 train_loss:3.4723 train_time:386765ms step_avg:405.84ms step:964/3242 train_loss:3.4531 train_time:387164ms step_avg:405.83ms step:965/3242 train_loss:3.6629 train_time:387563ms step_avg:405.83ms step:966/3242 train_loss:3.6978 train_time:387961ms step_avg:405.82ms step:967/3242 train_loss:3.6697 train_time:388359ms step_avg:405.81ms step:968/3242 train_loss:3.6964 train_time:388757ms step_avg:405.80ms step:969/3242 train_loss:3.5761 train_time:389156ms step_avg:405.79ms step:970/3242 train_loss:3.5562 train_time:389554ms step_avg:405.79ms step:971/3242 train_loss:3.4424 train_time:389953ms step_avg:405.78ms step:972/3242 train_loss:3.5381 train_time:390352ms step_avg:405.77ms step:973/3242 train_loss:3.3008 train_time:390750ms step_avg:405.76ms step:974/3242 train_loss:3.6002 train_time:391149ms step_avg:405.76ms step:975/3242 train_loss:3.3563 train_time:391547ms step_avg:405.75ms step:976/3242 train_loss:3.6625 train_time:391944ms step_avg:405.74ms step:977/3242 train_loss:3.8542 train_time:392344ms step_avg:405.73ms step:978/3242 train_loss:3.7088 train_time:392742ms step_avg:405.72ms step:979/3242 train_loss:3.5419 train_time:393141ms step_avg:405.72ms step:980/3242 train_loss:3.6413 train_time:393539ms step_avg:405.71ms step:981/3242 train_loss:3.5324 train_time:393937ms step_avg:405.70ms step:982/3242 train_loss:3.4887 train_time:394334ms step_avg:405.69ms step:983/3242 train_loss:3.5246 train_time:394733ms step_avg:405.69ms step:984/3242 train_loss:3.6359 train_time:395132ms step_avg:405.68ms step:985/3242 train_loss:3.7225 train_time:395533ms step_avg:405.67ms step:986/3242 train_loss:3.5126 train_time:395932ms step_avg:405.67ms step:987/3242 train_loss:3.5969 train_time:396331ms step_avg:405.66ms step:988/3242 train_loss:3.5887 train_time:396730ms step_avg:405.65ms step:989/3242 train_loss:3.6589 train_time:397128ms step_avg:405.65ms step:990/3242 train_loss:3.5533 train_time:397528ms step_avg:405.64ms step:991/3242 train_loss:3.6003 train_time:397929ms step_avg:405.64ms step:992/3242 train_loss:3.5445 train_time:398328ms step_avg:405.63ms step:993/3242 train_loss:3.4230 train_time:398727ms step_avg:405.62ms step:994/3242 train_loss:3.6587 train_time:399126ms step_avg:405.62ms step:995/3242 train_loss:3.4571 train_time:399526ms step_avg:405.61ms step:996/3242 train_loss:3.4964 train_time:399924ms step_avg:405.60ms step:997/3242 train_loss:3.5940 train_time:400322ms step_avg:405.59ms step:998/3242 train_loss:3.6164 train_time:400720ms step_avg:405.59ms step:999/3242 train_loss:3.8140 train_time:401118ms step_avg:405.58ms step:1000/3242 train_loss:3.5986 train_time:401516ms step_avg:405.57ms step:1000/3242 val_loss:3.5898 train_time:401543ms step_avg:405.60ms step:1001/3242 train_loss:3.8082 train_time:401916ms step_avg:405.57ms step:1002/3242 train_loss:3.6915 train_time:402315ms step_avg:405.56ms step:1003/3242 train_loss:3.5760 train_time:402714ms step_avg:405.55ms step:1004/3242 train_loss:3.5945 train_time:403113ms step_avg:405.55ms step:1005/3242 train_loss:3.6369 train_time:403512ms step_avg:405.54ms step:1006/3242 train_loss:3.6028 train_time:403910ms step_avg:405.53ms step:1007/3242 train_loss:3.6106 train_time:404309ms step_avg:405.53ms step:1008/3242 train_loss:3.5220 train_time:404709ms step_avg:405.52ms step:1009/3242 train_loss:3.5407 train_time:405109ms step_avg:405.51ms step:1010/3242 train_loss:3.6504 train_time:405507ms step_avg:405.51ms step:1011/3242 train_loss:4.0254 train_time:405907ms step_avg:405.50ms step:1012/3242 train_loss:3.7381 train_time:406304ms step_avg:405.49ms step:1013/3242 train_loss:3.4950 train_time:406703ms step_avg:405.49ms step:1014/3242 train_loss:3.4402 train_time:407103ms step_avg:405.48ms step:1015/3242 train_loss:3.4927 train_time:407502ms step_avg:405.47ms step:1016/3242 train_loss:3.5142 train_time:407900ms step_avg:405.47ms step:1017/3242 train_loss:3.7592 train_time:408298ms step_avg:405.46ms step:1018/3242 train_loss:3.4974 train_time:408696ms step_avg:405.45ms step:1019/3242 train_loss:3.6022 train_time:409096ms step_avg:405.45ms step:1020/3242 train_loss:3.5427 train_time:409495ms step_avg:405.44ms step:1021/3242 train_loss:3.6318 train_time:409894ms step_avg:405.43ms step:1022/3242 train_loss:3.5374 train_time:410293ms step_avg:405.43ms step:1023/3242 train_loss:3.9037 train_time:410692ms step_avg:405.42ms step:1024/3242 train_loss:3.6589 train_time:411091ms step_avg:405.42ms step:1025/3242 train_loss:3.4914 train_time:411490ms step_avg:405.41ms step:1026/3242 train_loss:3.7627 train_time:411889ms step_avg:405.40ms step:1027/3242 train_loss:3.5331 train_time:412288ms step_avg:405.40ms step:1028/3242 train_loss:3.5780 train_time:412686ms step_avg:405.39ms step:1029/3242 train_loss:3.5236 train_time:413085ms step_avg:405.38ms step:1030/3242 train_loss:3.3631 train_time:413484ms step_avg:405.38ms step:1031/3242 train_loss:3.7267 train_time:413885ms step_avg:405.37ms step:1032/3242 train_loss:3.9826 train_time:414284ms step_avg:405.37ms step:1033/3242 train_loss:3.5414 train_time:414684ms step_avg:405.36ms step:1034/3242 train_loss:3.5122 train_time:415083ms step_avg:405.35ms step:1035/3242 train_loss:3.5698 train_time:415482ms step_avg:405.35ms step:1036/3242 train_loss:3.7876 train_time:415881ms step_avg:405.34ms step:1037/3242 train_loss:3.4160 train_time:416280ms step_avg:405.34ms step:1038/3242 train_loss:3.3662 train_time:416679ms step_avg:405.33ms step:1039/3242 train_loss:3.7159 train_time:417077ms step_avg:405.32ms step:1040/3242 train_loss:3.4128 train_time:417477ms step_avg:405.32ms step:1041/3242 train_loss:3.4211 train_time:417877ms step_avg:405.31ms step:1042/3242 train_loss:3.6317 train_time:418275ms step_avg:405.31ms step:1043/3242 train_loss:3.3413 train_time:418674ms step_avg:405.30ms step:1044/3242 train_loss:3.4977 train_time:419073ms step_avg:405.29ms step:1045/3242 train_loss:3.6244 train_time:419472ms step_avg:405.29ms step:1046/3242 train_loss:3.8307 train_time:419873ms step_avg:405.28ms step:1047/3242 train_loss:3.4274 train_time:420271ms step_avg:405.28ms step:1048/3242 train_loss:3.8588 train_time:420670ms step_avg:405.27ms step:1049/3242 train_loss:3.4334 train_time:421070ms step_avg:405.26ms step:1050/3242 train_loss:3.4833 train_time:421471ms step_avg:405.26ms step:1051/3242 train_loss:3.4799 train_time:421871ms step_avg:405.26ms step:1052/3242 train_loss:3.4886 train_time:422269ms step_avg:405.25ms step:1053/3242 train_loss:3.4948 train_time:422668ms step_avg:405.24ms step:1054/3242 train_loss:3.5786 train_time:423067ms step_avg:405.24ms step:1055/3242 train_loss:3.5764 train_time:423466ms step_avg:405.23ms step:1056/3242 train_loss:3.5906 train_time:423865ms step_avg:405.22ms step:1057/3242 train_loss:3.5642 train_time:424264ms step_avg:405.22ms step:1058/3242 train_loss:3.5030 train_time:424661ms step_avg:405.21ms step:1059/3242 train_loss:3.6234 train_time:425059ms step_avg:405.20ms step:1060/3242 train_loss:3.6193 train_time:425456ms step_avg:405.20ms step:1061/3242 train_loss:3.6062 train_time:425855ms step_avg:405.19ms step:1062/3242 train_loss:3.7890 train_time:426253ms step_avg:405.18ms step:1063/3242 train_loss:3.4976 train_time:426652ms step_avg:405.18ms step:1064/3242 train_loss:3.4188 train_time:427051ms step_avg:405.17ms step:1065/3242 train_loss:3.4968 train_time:427449ms step_avg:405.16ms step:1066/3242 train_loss:3.6471 train_time:427848ms step_avg:405.16ms step:1067/3242 train_loss:3.5864 train_time:428246ms step_avg:405.15ms step:1068/3242 train_loss:3.5949 train_time:428645ms step_avg:405.15ms step:1069/3242 train_loss:3.4764 train_time:429046ms step_avg:405.14ms step:1070/3242 train_loss:3.5572 train_time:429446ms step_avg:405.14ms step:1071/3242 train_loss:3.8244 train_time:429845ms step_avg:405.13ms step:1072/3242 train_loss:3.5485 train_time:430244ms step_avg:405.13ms step:1073/3242 train_loss:3.2942 train_time:430643ms step_avg:405.12ms step:1074/3242 train_loss:3.5596 train_time:431043ms step_avg:405.12ms step:1075/3242 train_loss:3.6499 train_time:431442ms step_avg:405.11ms step:1076/3242 train_loss:3.5994 train_time:431840ms step_avg:405.10ms step:1077/3242 train_loss:3.6752 train_time:432238ms step_avg:405.10ms step:1078/3242 train_loss:3.6422 train_time:432637ms step_avg:405.09ms step:1079/3242 train_loss:3.5373 train_time:433035ms step_avg:405.08ms step:1080/3242 train_loss:3.5512 train_time:433434ms step_avg:405.08ms step:1081/3242 train_loss:3.6215 train_time:433834ms step_avg:405.07ms step:1082/3242 train_loss:3.6525 train_time:434233ms step_avg:405.07ms step:1083/3242 train_loss:3.5250 train_time:434632ms step_avg:405.06ms step:1084/3242 train_loss:3.5538 train_time:435030ms step_avg:405.06ms step:1085/3242 train_loss:3.2799 train_time:435429ms step_avg:405.05ms step:1086/3242 train_loss:3.5327 train_time:435827ms step_avg:405.04ms step:1087/3242 train_loss:3.7265 train_time:436226ms step_avg:405.04ms step:1088/3242 train_loss:3.5337 train_time:436625ms step_avg:405.03ms step:1089/3242 train_loss:3.4814 train_time:437023ms step_avg:405.03ms step:1090/3242 train_loss:3.4846 train_time:437423ms step_avg:405.02ms step:1091/3242 train_loss:3.6186 train_time:437822ms step_avg:405.02ms step:1092/3242 train_loss:3.4765 train_time:438220ms step_avg:405.01ms step:1093/3242 train_loss:3.5957 train_time:438620ms step_avg:405.00ms step:1094/3242 train_loss:3.4473 train_time:439018ms step_avg:405.00ms step:1095/3242 train_loss:3.4041 train_time:439415ms step_avg:404.99ms step:1096/3242 train_loss:3.7644 train_time:439814ms step_avg:404.99ms step:1097/3242 train_loss:3.5242 train_time:440212ms step_avg:404.98ms step:1098/3242 train_loss:3.7000 train_time:440609ms step_avg:404.97ms step:1099/3242 train_loss:3.5427 train_time:441007ms step_avg:404.97ms step:1100/3242 train_loss:3.6730 train_time:441406ms step_avg:404.96ms step:1101/3242 train_loss:3.6690 train_time:441804ms step_avg:404.95ms step:1102/3242 train_loss:3.5424 train_time:442202ms step_avg:404.95ms step:1103/3242 train_loss:3.6541 train_time:442602ms step_avg:404.94ms step:1104/3242 train_loss:3.5660 train_time:443000ms step_avg:404.94ms step:1105/3242 train_loss:3.4877 train_time:443399ms step_avg:404.93ms step:1106/3242 train_loss:3.6864 train_time:443798ms step_avg:404.93ms step:1107/3242 train_loss:3.6499 train_time:444199ms step_avg:404.92ms step:1108/3242 train_loss:3.5131 train_time:444596ms step_avg:404.91ms step:1109/3242 train_loss:3.8127 train_time:444996ms step_avg:404.91ms step:1110/3242 train_loss:3.3848 train_time:445395ms step_avg:404.90ms step:1111/3242 train_loss:3.6271 train_time:445795ms step_avg:404.90ms step:1112/3242 train_loss:3.7282 train_time:446194ms step_avg:404.89ms step:1113/3242 train_loss:3.4423 train_time:446593ms step_avg:404.89ms step:1114/3242 train_loss:3.7430 train_time:446993ms step_avg:404.89ms step:1115/3242 train_loss:3.4098 train_time:447393ms step_avg:404.88ms step:1116/3242 train_loss:3.6215 train_time:447792ms step_avg:404.88ms step:1117/3242 train_loss:3.7371 train_time:448191ms step_avg:404.87ms step:1118/3242 train_loss:3.3790 train_time:448589ms step_avg:404.86ms step:1119/3242 train_loss:3.9086 train_time:448988ms step_avg:404.86ms step:1120/3242 train_loss:3.4496 train_time:449386ms step_avg:404.85ms step:1121/3242 train_loss:3.3529 train_time:449784ms step_avg:404.85ms step:1122/3242 train_loss:3.5042 train_time:450182ms step_avg:404.84ms step:1123/3242 train_loss:3.5708 train_time:450580ms step_avg:404.83ms step:1124/3242 train_loss:3.5761 train_time:450979ms step_avg:404.83ms step:1125/3242 train_loss:3.5523 train_time:451377ms step_avg:404.82ms step:1125/3242 val_loss:3.5594 train_time:451403ms step_avg:404.85ms step:1126/3242 train_loss:3.7183 train_time:451776ms step_avg:404.82ms step:1127/3242 train_loss:3.5051 train_time:452175ms step_avg:404.81ms step:1128/3242 train_loss:3.7470 train_time:452575ms step_avg:404.81ms step:1129/3242 train_loss:3.3365 train_time:452976ms step_avg:404.80ms step:1130/3242 train_loss:3.4313 train_time:453375ms step_avg:404.80ms step:1131/3242 train_loss:3.5064 train_time:453775ms step_avg:404.79ms step:1132/3242 train_loss:3.5108 train_time:454174ms step_avg:404.79ms step:1133/3242 train_loss:3.6832 train_time:454574ms step_avg:404.79ms step:1134/3242 train_loss:3.7659 train_time:454974ms step_avg:404.78ms step:1135/3242 train_loss:3.5399 train_time:455375ms step_avg:404.78ms step:1136/3242 train_loss:3.5150 train_time:455774ms step_avg:404.77ms step:1137/3242 train_loss:3.6733 train_time:456173ms step_avg:404.77ms step:1138/3242 train_loss:3.6567 train_time:456573ms step_avg:404.76ms step:1139/3242 train_loss:3.5640 train_time:456973ms step_avg:404.76ms step:1140/3242 train_loss:3.7284 train_time:457560ms step_avg:404.92ms step:1141/3242 train_loss:3.6038 train_time:457965ms step_avg:404.92ms step:1142/3242 train_loss:3.5405 train_time:458366ms step_avg:404.92ms step:1143/3242 train_loss:3.5428 train_time:458956ms step_avg:405.08ms step:1144/3242 train_loss:3.7917 train_time:459355ms step_avg:405.07ms step:1145/3242 train_loss:3.6649 train_time:459751ms step_avg:405.07ms step:1146/3242 train_loss:3.5848 train_time:460146ms step_avg:405.06ms step:1147/3242 train_loss:3.5326 train_time:460542ms step_avg:405.05ms step:1148/3242 train_loss:3.5877 train_time:460937ms step_avg:405.04ms step:1149/3242 train_loss:3.8813 train_time:461331ms step_avg:405.03ms step:1150/3242 train_loss:3.7183 train_time:461726ms step_avg:405.02ms step:1151/3242 train_loss:3.5526 train_time:462119ms step_avg:405.01ms step:1152/3242 train_loss:3.4102 train_time:462514ms step_avg:405.00ms step:1153/3242 train_loss:3.4613 train_time:462910ms step_avg:405.00ms step:1154/3242 train_loss:3.5080 train_time:463304ms step_avg:404.99ms step:1155/3242 train_loss:3.9207 train_time:463699ms step_avg:404.98ms step:1156/3242 train_loss:3.7130 train_time:464094ms step_avg:404.97ms step:1157/3242 train_loss:3.7221 train_time:464489ms step_avg:404.96ms step:1158/3242 train_loss:3.3783 train_time:464883ms step_avg:404.95ms step:1159/3242 train_loss:3.7728 train_time:465277ms step_avg:404.94ms step:1160/3242 train_loss:3.6281 train_time:465671ms step_avg:404.93ms step:1161/3242 train_loss:3.4028 train_time:466065ms step_avg:404.92ms step:1162/3242 train_loss:3.5058 train_time:466460ms step_avg:404.91ms step:1163/3242 train_loss:3.4144 train_time:466854ms step_avg:404.90ms step:1164/3242 train_loss:3.3967 train_time:467248ms step_avg:404.89ms step:1165/3242 train_loss:3.4476 train_time:467642ms step_avg:404.88ms step:1166/3242 train_loss:3.6598 train_time:468036ms step_avg:404.88ms step:1167/3242 train_loss:3.5444 train_time:468431ms step_avg:404.87ms step:1168/3242 train_loss:3.4111 train_time:468825ms step_avg:404.86ms step:1169/3242 train_loss:3.5258 train_time:469220ms step_avg:404.85ms step:1170/3242 train_loss:3.5318 train_time:469616ms step_avg:404.84ms step:1171/3242 train_loss:3.7342 train_time:470011ms step_avg:404.83ms step:1172/3242 train_loss:3.4696 train_time:470407ms step_avg:404.83ms step:1173/3242 train_loss:3.5536 train_time:470802ms step_avg:404.82ms step:1174/3242 train_loss:3.5251 train_time:471197ms step_avg:404.81ms step:1175/3242 train_loss:3.5511 train_time:471592ms step_avg:404.80ms step:1176/3242 train_loss:3.7286 train_time:471988ms step_avg:404.79ms step:1177/3242 train_loss:3.4505 train_time:472383ms step_avg:404.78ms step:1178/3242 train_loss:3.5642 train_time:472779ms step_avg:404.78ms step:1179/3242 train_loss:3.6932 train_time:473174ms step_avg:404.77ms step:1180/3242 train_loss:3.4783 train_time:473569ms step_avg:404.76ms step:1181/3242 train_loss:3.6435 train_time:473965ms step_avg:404.75ms step:1182/3242 train_loss:3.4859 train_time:474360ms step_avg:404.74ms step:1183/3242 train_loss:3.4458 train_time:474755ms step_avg:404.74ms step:1184/3242 train_loss:3.3009 train_time:475150ms step_avg:404.73ms step:1185/3242 train_loss:3.5994 train_time:475544ms step_avg:404.72ms step:1186/3242 train_loss:3.3973 train_time:475937ms step_avg:404.71ms step:1187/3242 train_loss:3.7936 train_time:476331ms step_avg:404.70ms step:1188/3242 train_loss:3.7654 train_time:476724ms step_avg:404.69ms step:1189/3242 train_loss:3.5137 train_time:477117ms step_avg:404.68ms step:1190/3242 train_loss:3.5289 train_time:477513ms step_avg:404.67ms step:1191/3242 train_loss:3.5581 train_time:477908ms step_avg:404.66ms step:1192/3242 train_loss:3.3643 train_time:478303ms step_avg:404.66ms step:1193/3242 train_loss:3.6105 train_time:478699ms step_avg:404.65ms step:1194/3242 train_loss:3.9917 train_time:479094ms step_avg:404.64ms step:1195/3242 train_loss:3.5258 train_time:479490ms step_avg:404.63ms step:1196/3242 train_loss:3.4533 train_time:479887ms step_avg:404.63ms step:1197/3242 train_loss:3.7190 train_time:480283ms step_avg:404.62ms step:1198/3242 train_loss:3.5134 train_time:480679ms step_avg:404.61ms step:1199/3242 train_loss:3.4406 train_time:481075ms step_avg:404.60ms step:1200/3242 train_loss:3.4205 train_time:481472ms step_avg:404.60ms step:1201/3242 train_loss:3.4079 train_time:481867ms step_avg:404.59ms step:1202/3242 train_loss:3.5432 train_time:482262ms step_avg:404.58ms step:1203/3242 train_loss:3.6114 train_time:482657ms step_avg:404.57ms step:1204/3242 train_loss:3.7578 train_time:483052ms step_avg:404.57ms step:1205/3242 train_loss:3.7134 train_time:483446ms step_avg:404.56ms step:1206/3242 train_loss:3.5779 train_time:483840ms step_avg:404.55ms step:1207/3242 train_loss:3.4868 train_time:484235ms step_avg:404.54ms step:1208/3242 train_loss:3.5388 train_time:484629ms step_avg:404.53ms step:1209/3242 train_loss:3.6545 train_time:485023ms step_avg:404.52ms step:1210/3242 train_loss:3.7128 train_time:485419ms step_avg:404.52ms step:1211/3242 train_loss:3.4709 train_time:485815ms step_avg:404.51ms step:1212/3242 train_loss:3.3707 train_time:486211ms step_avg:404.50ms step:1213/3242 train_loss:3.3096 train_time:486607ms step_avg:404.49ms step:1214/3242 train_loss:3.6080 train_time:487002ms step_avg:404.49ms step:1215/3242 train_loss:3.6780 train_time:487398ms step_avg:404.48ms step:1216/3242 train_loss:3.5250 train_time:487795ms step_avg:404.47ms step:1217/3242 train_loss:3.4232 train_time:488190ms step_avg:404.47ms step:1218/3242 train_loss:3.7365 train_time:488585ms step_avg:404.46ms step:1219/3242 train_loss:3.4684 train_time:488982ms step_avg:404.45ms step:1220/3242 train_loss:3.5320 train_time:489378ms step_avg:404.44ms step:1221/3242 train_loss:3.5691 train_time:489773ms step_avg:404.44ms step:1222/3242 train_loss:3.5896 train_time:490167ms step_avg:404.43ms step:1223/3242 train_loss:3.5453 train_time:490562ms step_avg:404.42ms step:1224/3242 train_loss:3.7505 train_time:490956ms step_avg:404.41ms step:1225/3242 train_loss:3.5114 train_time:491350ms step_avg:404.40ms step:1226/3242 train_loss:3.4395 train_time:491745ms step_avg:404.40ms step:1227/3242 train_loss:3.3449 train_time:492139ms step_avg:404.39ms step:1228/3242 train_loss:3.4597 train_time:492534ms step_avg:404.38ms step:1229/3242 train_loss:3.4623 train_time:492929ms step_avg:404.37ms step:1230/3242 train_loss:3.4985 train_time:493323ms step_avg:404.36ms step:1231/3242 train_loss:3.5709 train_time:493718ms step_avg:404.36ms step:1232/3242 train_loss:3.4937 train_time:494112ms step_avg:404.35ms step:1233/3242 train_loss:3.7622 train_time:494507ms step_avg:404.34ms step:1234/3242 train_loss:3.8123 train_time:494901ms step_avg:404.33ms step:1235/3242 train_loss:3.7103 train_time:495296ms step_avg:404.32ms step:1236/3242 train_loss:3.6255 train_time:495691ms step_avg:404.32ms step:1237/3242 train_loss:3.7962 train_time:496086ms step_avg:404.31ms step:1238/3242 train_loss:3.5068 train_time:496481ms step_avg:404.30ms step:1239/3242 train_loss:3.4475 train_time:496877ms step_avg:404.29ms step:1240/3242 train_loss:3.3802 train_time:497272ms step_avg:404.29ms step:1241/3242 train_loss:3.4100 train_time:497668ms step_avg:404.28ms step:1242/3242 train_loss:3.4942 train_time:498063ms step_avg:404.27ms step:1243/3242 train_loss:3.4048 train_time:498459ms step_avg:404.27ms step:1244/3242 train_loss:3.6035 train_time:498854ms step_avg:404.26ms step:1245/3242 train_loss:3.4854 train_time:499249ms step_avg:404.25ms step:1246/3242 train_loss:3.5751 train_time:499643ms step_avg:404.24ms step:1247/3242 train_loss:3.6365 train_time:500038ms step_avg:404.23ms step:1248/3242 train_loss:3.3965 train_time:500434ms step_avg:404.23ms step:1249/3242 train_loss:3.4736 train_time:500829ms step_avg:404.22ms step:1250/3242 train_loss:3.5940 train_time:501224ms step_avg:404.21ms step:1250/3242 val_loss:3.5301 train_time:501251ms step_avg:404.23ms step:1251/3242 train_loss:3.7045 train_time:501621ms step_avg:404.21ms step:1252/3242 train_loss:3.6003 train_time:502015ms step_avg:404.20ms step:1253/3242 train_loss:3.4590 train_time:502410ms step_avg:404.19ms step:1254/3242 train_loss:3.5517 train_time:502805ms step_avg:404.18ms step:1255/3242 train_loss:3.5417 train_time:503200ms step_avg:404.18ms step:1256/3242 train_loss:3.5438 train_time:503595ms step_avg:404.17ms step:1257/3242 train_loss:3.7032 train_time:503990ms step_avg:404.16ms step:1258/3242 train_loss:3.6193 train_time:504385ms step_avg:404.15ms step:1259/3242 train_loss:3.6538 train_time:504780ms step_avg:404.15ms step:1260/3242 train_loss:3.6226 train_time:505175ms step_avg:404.14ms step:1261/3242 train_loss:3.5325 train_time:505570ms step_avg:404.13ms step:1262/3242 train_loss:3.5180 train_time:505966ms step_avg:404.13ms step:1263/3242 train_loss:3.3775 train_time:506361ms step_avg:404.12ms step:1264/3242 train_loss:3.3005 train_time:506755ms step_avg:404.11ms step:1265/3242 train_loss:3.6574 train_time:507150ms step_avg:404.10ms step:1266/3242 train_loss:3.3508 train_time:507545ms step_avg:404.10ms step:1267/3242 train_loss:3.6902 train_time:507940ms step_avg:404.09ms step:1268/3242 train_loss:3.5925 train_time:508334ms step_avg:404.08ms step:1269/3242 train_loss:3.5811 train_time:508727ms step_avg:404.07ms step:1270/3242 train_loss:3.6635 train_time:509123ms step_avg:404.07ms step:1271/3242 train_loss:3.4826 train_time:509516ms step_avg:404.06ms step:1272/3242 train_loss:3.4931 train_time:509910ms step_avg:404.05ms step:1273/3242 train_loss:3.4383 train_time:510305ms step_avg:404.04ms step:1274/3242 train_loss:3.4266 train_time:510700ms step_avg:404.03ms step:1275/3242 train_loss:3.4704 train_time:511095ms step_avg:404.03ms step:1276/3242 train_loss:3.5173 train_time:511491ms step_avg:404.02ms step:1277/3242 train_loss:3.5095 train_time:511887ms step_avg:404.01ms step:1278/3242 train_loss:3.6412 train_time:512280ms step_avg:404.01ms step:1279/3242 train_loss:3.6411 train_time:512675ms step_avg:404.00ms step:1280/3242 train_loss:3.5917 train_time:513070ms step_avg:403.99ms step:1281/3242 train_loss:3.4940 train_time:513465ms step_avg:403.99ms step:1282/3242 train_loss:3.3318 train_time:513861ms step_avg:403.98ms step:1283/3242 train_loss:3.4296 train_time:514256ms step_avg:403.97ms step:1284/3242 train_loss:3.6394 train_time:514652ms step_avg:403.97ms step:1285/3242 train_loss:3.5557 train_time:515049ms step_avg:403.96ms step:1286/3242 train_loss:3.6316 train_time:515445ms step_avg:403.95ms step:1287/3242 train_loss:3.5765 train_time:515840ms step_avg:403.95ms step:1288/3242 train_loss:3.5428 train_time:516235ms step_avg:403.94ms step:1289/3242 train_loss:3.4017 train_time:516630ms step_avg:403.93ms step:1290/3242 train_loss:3.4672 train_time:517025ms step_avg:403.93ms step:1291/3242 train_loss:3.4431 train_time:517420ms step_avg:403.92ms step:1292/3242 train_loss:3.4097 train_time:517815ms step_avg:403.91ms step:1293/3242 train_loss:3.5536 train_time:518210ms step_avg:403.91ms step:1294/3242 train_loss:3.5804 train_time:518606ms step_avg:403.90ms step:1295/3242 train_loss:3.6220 train_time:519001ms step_avg:403.89ms step:1296/3242 train_loss:3.6466 train_time:519396ms step_avg:403.88ms step:1297/3242 train_loss:3.3016 train_time:519790ms step_avg:403.88ms step:1298/3242 train_loss:3.4185 train_time:520186ms step_avg:403.87ms step:1299/3242 train_loss:3.6078 train_time:520581ms step_avg:403.86ms step:1300/3242 train_loss:3.3677 train_time:520977ms step_avg:403.86ms step:1301/3242 train_loss:3.6122 train_time:521372ms step_avg:403.85ms step:1302/3242 train_loss:3.5410 train_time:521768ms step_avg:403.85ms step:1303/3242 train_loss:3.6596 train_time:522165ms step_avg:403.84ms step:1304/3242 train_loss:3.6266 train_time:522561ms step_avg:403.83ms step:1305/3242 train_loss:3.7717 train_time:522957ms step_avg:403.83ms step:1306/3242 train_loss:3.3864 train_time:523353ms step_avg:403.82ms step:1307/3242 train_loss:3.8794 train_time:523748ms step_avg:403.82ms step:1308/3242 train_loss:3.4868 train_time:524145ms step_avg:403.81ms step:1309/3242 train_loss:3.6923 train_time:524540ms step_avg:403.80ms step:1310/3242 train_loss:3.8430 train_time:524935ms step_avg:403.80ms step:1311/3242 train_loss:3.4590 train_time:525329ms step_avg:403.79ms step:1312/3242 train_loss:3.4288 train_time:525724ms step_avg:403.78ms step:1313/3242 train_loss:3.6012 train_time:526118ms step_avg:403.77ms step:1314/3242 train_loss:3.5264 train_time:526512ms step_avg:403.77ms step:1315/3242 train_loss:3.5050 train_time:526907ms step_avg:403.76ms step:1316/3242 train_loss:3.5917 train_time:527303ms step_avg:403.75ms step:1317/3242 train_loss:3.2377 train_time:527697ms step_avg:403.75ms step:1318/3242 train_loss:3.4251 train_time:528092ms step_avg:403.74ms step:1319/3242 train_loss:3.5330 train_time:528487ms step_avg:403.73ms step:1320/3242 train_loss:3.6491 train_time:528883ms step_avg:403.73ms step:1321/3242 train_loss:3.3514 train_time:529277ms step_avg:403.72ms step:1322/3242 train_loss:3.5219 train_time:529672ms step_avg:403.71ms step:1323/3242 train_loss:3.6624 train_time:530067ms step_avg:403.71ms step:1324/3242 train_loss:3.6273 train_time:530464ms step_avg:403.70ms step:1325/3242 train_loss:3.5108 train_time:530859ms step_avg:403.70ms step:1326/3242 train_loss:3.5403 train_time:531254ms step_avg:403.69ms step:1327/3242 train_loss:3.8023 train_time:531649ms step_avg:403.68ms step:1328/3242 train_loss:3.8362 train_time:532043ms step_avg:403.67ms step:1329/3242 train_loss:3.5716 train_time:532437ms step_avg:403.67ms step:1330/3242 train_loss:3.3951 train_time:533020ms step_avg:403.80ms step:1331/3242 train_loss:3.7991 train_time:533422ms step_avg:403.80ms step:1332/3242 train_loss:3.5675 train_time:533817ms step_avg:403.80ms step:1333/3242 train_loss:3.7716 train_time:534212ms step_avg:403.79ms step:1334/3242 train_loss:3.4376 train_time:534797ms step_avg:403.93ms step:1335/3242 train_loss:3.4889 train_time:535192ms step_avg:403.92ms step:1336/3242 train_loss:3.4916 train_time:535587ms step_avg:403.91ms step:1337/3242 train_loss:3.5987 train_time:535982ms step_avg:403.91ms step:1338/3242 train_loss:3.4453 train_time:536377ms step_avg:403.90ms step:1339/3242 train_loss:3.4284 train_time:536772ms step_avg:403.89ms step:1340/3242 train_loss:3.4464 train_time:537168ms step_avg:403.89ms step:1341/3242 train_loss:3.5184 train_time:537565ms step_avg:403.88ms step:1342/3242 train_loss:3.4037 train_time:537960ms step_avg:403.87ms step:1343/3242 train_loss:3.5423 train_time:538355ms step_avg:403.87ms step:1344/3242 train_loss:3.4761 train_time:538750ms step_avg:403.86ms step:1345/3242 train_loss:3.4705 train_time:539303ms step_avg:403.97ms step:1346/3242 train_loss:3.6308 train_time:539707ms step_avg:403.97ms step:1347/3242 train_loss:3.5442 train_time:540101ms step_avg:403.96ms step:1348/3242 train_loss:3.4643 train_time:540495ms step_avg:403.96ms step:1349/3242 train_loss:3.4220 train_time:540890ms step_avg:403.95ms step:1350/3242 train_loss:3.4693 train_time:541285ms step_avg:403.94ms step:1351/3242 train_loss:3.9232 train_time:541679ms step_avg:403.94ms step:1352/3242 train_loss:3.4910 train_time:542074ms step_avg:403.93ms step:1353/3242 train_loss:3.5934 train_time:542469ms step_avg:403.92ms step:1354/3242 train_loss:3.7726 train_time:542864ms step_avg:403.92ms step:1355/3242 train_loss:3.4116 train_time:543259ms step_avg:403.91ms step:1356/3242 train_loss:3.4772 train_time:543654ms step_avg:403.90ms step:1357/3242 train_loss:3.4002 train_time:544049ms step_avg:403.90ms step:1358/3242 train_loss:3.5859 train_time:544445ms step_avg:403.89ms step:1359/3242 train_loss:3.4916 train_time:544840ms step_avg:403.88ms step:1360/3242 train_loss:3.7719 train_time:545235ms step_avg:403.88ms step:1361/3242 train_loss:3.8693 train_time:545630ms step_avg:403.87ms step:1362/3242 train_loss:3.8079 train_time:546025ms step_avg:403.86ms step:1363/3242 train_loss:3.3734 train_time:546419ms step_avg:403.86ms step:1364/3242 train_loss:3.6019 train_time:546813ms step_avg:403.85ms step:1365/3242 train_loss:3.5084 train_time:547208ms step_avg:403.84ms step:1366/3242 train_loss:3.3519 train_time:547604ms step_avg:403.84ms step:1367/3242 train_loss:3.3560 train_time:547999ms step_avg:403.83ms step:1368/3242 train_loss:3.3401 train_time:548394ms step_avg:403.82ms step:1369/3242 train_loss:3.5378 train_time:548789ms step_avg:403.82ms step:1370/3242 train_loss:3.5294 train_time:549185ms step_avg:403.81ms step:1371/3242 train_loss:3.5370 train_time:549579ms step_avg:403.81ms step:1372/3242 train_loss:3.7185 train_time:549974ms step_avg:403.80ms step:1373/3242 train_loss:3.5782 train_time:550369ms step_avg:403.79ms step:1374/3242 train_loss:3.3790 train_time:550765ms step_avg:403.79ms step:1375/3242 train_loss:3.5310 train_time:551161ms step_avg:403.78ms step:1375/3242 val_loss:3.5085 train_time:551187ms step_avg:403.80ms step:1376/3242 train_loss:3.4564 train_time:551557ms step_avg:403.78ms step:1377/3242 train_loss:3.5522 train_time:551951ms step_avg:403.77ms step:1378/3242 train_loss:3.7297 train_time:552345ms step_avg:403.76ms step:1379/3242 train_loss:3.4612 train_time:552740ms step_avg:403.75ms step:1380/3242 train_loss:3.4868 train_time:553134ms step_avg:403.75ms step:1381/3242 train_loss:3.4371 train_time:553528ms step_avg:403.74ms step:1382/3242 train_loss:3.5103 train_time:553922ms step_avg:403.73ms step:1383/3242 train_loss:3.6184 train_time:554317ms step_avg:403.73ms step:1384/3242 train_loss:3.5023 train_time:554712ms step_avg:403.72ms step:1385/3242 train_loss:3.3630 train_time:555108ms step_avg:403.71ms step:1386/3242 train_loss:3.4957 train_time:555503ms step_avg:403.71ms step:1387/3242 train_loss:3.4160 train_time:555899ms step_avg:403.70ms step:1388/3242 train_loss:3.5437 train_time:556294ms step_avg:403.70ms step:1389/3242 train_loss:3.2354 train_time:556688ms step_avg:403.69ms step:1390/3242 train_loss:3.4281 train_time:557083ms step_avg:403.68ms step:1391/3242 train_loss:3.5787 train_time:557478ms step_avg:403.68ms step:1392/3242 train_loss:3.6133 train_time:557873ms step_avg:403.67ms step:1393/3242 train_loss:3.8191 train_time:558267ms step_avg:403.66ms step:1394/3242 train_loss:3.5702 train_time:558661ms step_avg:403.66ms step:1395/3242 train_loss:3.6754 train_time:559056ms step_avg:403.65ms step:1396/3242 train_loss:3.5435 train_time:559450ms step_avg:403.64ms step:1397/3242 train_loss:3.3823 train_time:559844ms step_avg:403.64ms step:1398/3242 train_loss:3.6552 train_time:560240ms step_avg:403.63ms step:1399/3242 train_loss:3.3839 train_time:560634ms step_avg:403.62ms step:1400/3242 train_loss:3.4250 train_time:561028ms step_avg:403.62ms step:1401/3242 train_loss:3.4257 train_time:561589ms step_avg:403.73ms step:1402/3242 train_loss:3.4451 train_time:561991ms step_avg:403.73ms step:1403/3242 train_loss:3.6962 train_time:562386ms step_avg:403.72ms step:1404/3242 train_loss:4.1313 train_time:562780ms step_avg:403.72ms step:1405/3242 train_loss:3.5421 train_time:563175ms step_avg:403.71ms step:1406/3242 train_loss:3.5115 train_time:563569ms step_avg:403.70ms step:1407/3242 train_loss:3.5416 train_time:563963ms step_avg:403.70ms step:1408/3242 train_loss:3.3269 train_time:564357ms step_avg:403.69ms step:1409/3242 train_loss:3.3657 train_time:564752ms step_avg:403.68ms step:1410/3242 train_loss:3.5495 train_time:565146ms step_avg:403.68ms step:1411/3242 train_loss:3.6736 train_time:565684ms step_avg:403.77ms step:1412/3242 train_loss:3.9570 train_time:566087ms step_avg:403.77ms step:1413/3242 train_loss:3.7036 train_time:566481ms step_avg:403.76ms step:1414/3242 train_loss:3.6864 train_time:566876ms step_avg:403.76ms step:1415/3242 train_loss:3.5584 train_time:567271ms step_avg:403.75ms step:1416/3242 train_loss:3.4983 train_time:567665ms step_avg:403.74ms step:1417/3242 train_loss:3.4993 train_time:568060ms step_avg:403.74ms step:1418/3242 train_loss:3.1262 train_time:568453ms step_avg:403.73ms step:1419/3242 train_loss:3.0195 train_time:568848ms step_avg:403.72ms step:1420/3242 train_loss:3.6457 train_time:569242ms step_avg:403.72ms step:1421/3242 train_loss:3.6697 train_time:569637ms step_avg:403.71ms step:1422/3242 train_loss:3.4771 train_time:570031ms step_avg:403.70ms step:1423/3242 train_loss:3.3360 train_time:570580ms step_avg:403.81ms step:1424/3242 train_loss:3.5972 train_time:570982ms step_avg:403.81ms step:1425/3242 train_loss:3.2966 train_time:571377ms step_avg:403.80ms step:1426/3242 train_loss:3.4053 train_time:571771ms step_avg:403.79ms step:1427/3242 train_loss:3.7989 train_time:572165ms step_avg:403.79ms step:1428/3242 train_loss:3.4373 train_time:572726ms step_avg:403.90ms step:1429/3242 train_loss:3.5317 train_time:573119ms step_avg:403.89ms step:1430/3242 train_loss:3.4659 train_time:573513ms step_avg:403.88ms step:1431/3242 train_loss:3.5067 train_time:573907ms step_avg:403.88ms step:1432/3242 train_loss:3.4467 train_time:574301ms step_avg:403.87ms step:1433/3242 train_loss:3.4566 train_time:574695ms step_avg:403.86ms step:1434/3242 train_loss:3.5245 train_time:575088ms step_avg:403.85ms step:1435/3242 train_loss:3.3832 train_time:575482ms step_avg:403.85ms step:1436/3242 train_loss:3.9389 train_time:575876ms step_avg:403.84ms step:1437/3242 train_loss:3.4423 train_time:576270ms step_avg:403.83ms step:1438/3242 train_loss:3.4670 train_time:576665ms step_avg:403.83ms step:1439/3242 train_loss:3.4269 train_time:577060ms step_avg:403.82ms step:1440/3242 train_loss:3.4236 train_time:577454ms step_avg:403.81ms step:1441/3242 train_loss:3.2229 train_time:577848ms step_avg:403.81ms step:1442/3242 train_loss:3.6139 train_time:578243ms step_avg:403.80ms step:1443/3242 train_loss:3.4184 train_time:578638ms step_avg:403.79ms step:1444/3242 train_loss:3.6412 train_time:579033ms step_avg:403.79ms step:1445/3242 train_loss:3.5770 train_time:579427ms step_avg:403.78ms step:1446/3242 train_loss:3.4875 train_time:579821ms step_avg:403.78ms step:1447/3242 train_loss:3.9466 train_time:580215ms step_avg:403.77ms step:1448/3242 train_loss:3.5135 train_time:580609ms step_avg:403.76ms step:1449/3242 train_loss:3.2139 train_time:581003ms step_avg:403.75ms step:1450/3242 train_loss:3.4582 train_time:581399ms step_avg:403.75ms step:1451/3242 train_loss:3.4596 train_time:581794ms step_avg:403.74ms step:1452/3242 train_loss:3.5760 train_time:582188ms step_avg:403.74ms step:1453/3242 train_loss:3.4352 train_time:582583ms step_avg:403.73ms step:1454/3242 train_loss:3.2572 train_time:582978ms step_avg:403.72ms step:1455/3242 train_loss:3.4651 train_time:583373ms step_avg:403.72ms step:1456/3242 train_loss:3.4335 train_time:583769ms step_avg:403.71ms step:1457/3242 train_loss:3.3731 train_time:584164ms step_avg:403.71ms step:1458/3242 train_loss:3.5706 train_time:584560ms step_avg:403.70ms step:1459/3242 train_loss:3.5863 train_time:584956ms step_avg:403.70ms step:1460/3242 train_loss:3.5026 train_time:585351ms step_avg:403.69ms step:1461/3242 train_loss:3.4517 train_time:585748ms step_avg:403.69ms step:1462/3242 train_loss:3.6191 train_time:586144ms step_avg:403.68ms step:1463/3242 train_loss:3.5070 train_time:586540ms step_avg:403.67ms step:1464/3242 train_loss:3.4260 train_time:586936ms step_avg:403.67ms step:1465/3242 train_loss:3.4165 train_time:587330ms step_avg:403.66ms step:1466/3242 train_loss:3.4550 train_time:587725ms step_avg:403.66ms step:1467/3242 train_loss:3.7135 train_time:588120ms step_avg:403.65ms step:1468/3242 train_loss:3.5704 train_time:588516ms step_avg:403.65ms step:1469/3242 train_loss:3.4678 train_time:589062ms step_avg:403.74ms step:1470/3242 train_loss:3.5381 train_time:589457ms step_avg:403.74ms step:1471/3242 train_loss:3.5478 train_time:589853ms step_avg:403.73ms step:1472/3242 train_loss:3.4349 train_time:590248ms step_avg:403.73ms step:1473/3242 train_loss:3.3370 train_time:590642ms step_avg:403.72ms step:1474/3242 train_loss:3.5759 train_time:591036ms step_avg:403.71ms step:1475/3242 train_loss:3.5265 train_time:591431ms step_avg:403.71ms step:1476/3242 train_loss:3.6460 train_time:591826ms step_avg:403.70ms step:1477/3242 train_loss:3.4482 train_time:592221ms step_avg:403.70ms step:1478/3242 train_loss:3.5093 train_time:592616ms step_avg:403.69ms step:1479/3242 train_loss:3.6255 train_time:593012ms step_avg:403.68ms step:1480/3242 train_loss:3.5138 train_time:593407ms step_avg:403.68ms step:1481/3242 train_loss:3.4557 train_time:593803ms step_avg:403.67ms step:1482/3242 train_loss:3.3654 train_time:594199ms step_avg:403.67ms step:1483/3242 train_loss:3.4621 train_time:594595ms step_avg:403.66ms step:1484/3242 train_loss:3.5503 train_time:594989ms step_avg:403.66ms step:1485/3242 train_loss:3.4594 train_time:595384ms step_avg:403.65ms step:1486/3242 train_loss:3.4138 train_time:595780ms step_avg:403.64ms step:1487/3242 train_loss:3.6579 train_time:596176ms step_avg:403.64ms step:1488/3242 train_loss:3.4290 train_time:596573ms step_avg:403.64ms step:1489/3242 train_loss:3.3941 train_time:596967ms step_avg:403.63ms step:1490/3242 train_loss:3.3581 train_time:597363ms step_avg:403.62ms step:1491/3242 train_loss:3.1866 train_time:597759ms step_avg:403.62ms step:1492/3242 train_loss:3.3667 train_time:598155ms step_avg:403.61ms step:1493/3242 train_loss:3.2311 train_time:598552ms step_avg:403.61ms step:1494/3242 train_loss:3.5541 train_time:598948ms step_avg:403.60ms step:1495/3242 train_loss:3.6325 train_time:599344ms step_avg:403.60ms step:1496/3242 train_loss:3.3977 train_time:599739ms step_avg:403.59ms step:1497/3242 train_loss:3.7143 train_time:600135ms step_avg:403.59ms step:1498/3242 train_loss:3.4075 train_time:600531ms step_avg:403.58ms step:1499/3242 train_loss:3.4986 train_time:600927ms step_avg:403.58ms step:1500/3242 train_loss:3.3802 train_time:601491ms step_avg:403.69ms step:1500/3242 val_loss:3.4847 train_time:601517ms step_avg:403.70ms step:1501/3242 train_loss:3.5683 train_time:601886ms step_avg:403.68ms step:1502/3242 train_loss:3.4322 train_time:602281ms step_avg:403.67ms step:1503/3242 train_loss:3.5439 train_time:602675ms step_avg:403.67ms step:1504/3242 train_loss:3.4233 train_time:603069ms step_avg:403.66ms step:1505/3242 train_loss:3.3290 train_time:603464ms step_avg:403.66ms step:1506/3242 train_loss:3.5032 train_time:603859ms step_avg:403.65ms step:1507/3242 train_loss:3.5555 train_time:604254ms step_avg:403.64ms step:1508/3242 train_loss:3.5734 train_time:604648ms step_avg:403.64ms step:1509/3242 train_loss:3.3649 train_time:605042ms step_avg:403.63ms step:1510/3242 train_loss:3.3309 train_time:605437ms step_avg:403.62ms step:1511/3242 train_loss:3.4763 train_time:605830ms step_avg:403.62ms step:1512/3242 train_loss:3.4117 train_time:606224ms step_avg:403.61ms step:1513/3242 train_loss:3.4988 train_time:606619ms step_avg:403.61ms step:1514/3242 train_loss:3.3580 train_time:607015ms step_avg:403.60ms step:1515/3242 train_loss:3.4549 train_time:607409ms step_avg:403.59ms step:1516/3242 train_loss:3.5305 train_time:607803ms step_avg:403.59ms step:1517/3242 train_loss:3.4929 train_time:608198ms step_avg:403.58ms step:1518/3242 train_loss:3.4883 train_time:608592ms step_avg:403.58ms step:1519/3242 train_loss:3.4758 train_time:608987ms step_avg:403.57ms step:1520/3242 train_loss:3.4983 train_time:609576ms step_avg:403.69ms step:1521/3242 train_loss:3.4038 train_time:609978ms step_avg:403.69ms step:1522/3242 train_loss:3.6867 train_time:610374ms step_avg:403.69ms step:1523/3242 train_loss:3.5695 train_time:610768ms step_avg:403.68ms step:1524/3242 train_loss:3.3445 train_time:611360ms step_avg:403.80ms step:1525/3242 train_loss:3.4340 train_time:611755ms step_avg:403.80ms step:1526/3242 train_loss:3.5587 train_time:612149ms step_avg:403.79ms step:1527/3242 train_loss:3.4906 train_time:612545ms step_avg:403.79ms step:1528/3242 train_loss:3.6306 train_time:612940ms step_avg:403.78ms step:1529/3242 train_loss:3.4557 train_time:613335ms step_avg:403.78ms step:1530/3242 train_loss:3.5066 train_time:613730ms step_avg:403.77ms step:1531/3242 train_loss:3.4932 train_time:614125ms step_avg:403.76ms step:1532/3242 train_loss:3.8789 train_time:614520ms step_avg:403.76ms step:1533/3242 train_loss:3.6711 train_time:614916ms step_avg:403.75ms step:1534/3242 train_loss:3.3092 train_time:615309ms step_avg:403.75ms step:1535/3242 train_loss:3.4182 train_time:615704ms step_avg:403.74ms step:1536/3242 train_loss:3.5187 train_time:616098ms step_avg:403.73ms step:1537/3242 train_loss:3.4398 train_time:616492ms step_avg:403.73ms step:1538/3242 train_loss:3.5568 train_time:616887ms step_avg:403.72ms step:1539/3242 train_loss:3.4948 train_time:617282ms step_avg:403.72ms step:1540/3242 train_loss:3.3912 train_time:617677ms step_avg:403.71ms step:1541/3242 train_loss:3.4213 train_time:618072ms step_avg:403.70ms step:1542/3242 train_loss:3.8404 train_time:618467ms step_avg:403.70ms step:1543/3242 train_loss:3.4454 train_time:618862ms step_avg:403.69ms step:1544/3242 train_loss:3.5426 train_time:619258ms step_avg:403.69ms step:1545/3242 train_loss:3.4477 train_time:619654ms step_avg:403.68ms step:1546/3242 train_loss:3.4148 train_time:620048ms step_avg:403.68ms step:1547/3242 train_loss:3.3549 train_time:620443ms step_avg:403.67ms step:1548/3242 train_loss:3.4859 train_time:620837ms step_avg:403.67ms step:1549/3242 train_loss:3.5861 train_time:621231ms step_avg:403.66ms step:1550/3242 train_loss:3.3711 train_time:621626ms step_avg:403.65ms step:1551/3242 train_loss:3.4625 train_time:622020ms step_avg:403.65ms step:1552/3242 train_loss:3.4211 train_time:622416ms step_avg:403.64ms step:1553/3242 train_loss:3.5437 train_time:622810ms step_avg:403.64ms step:1554/3242 train_loss:3.4089 train_time:623204ms step_avg:403.63ms step:1555/3242 train_loss:3.4258 train_time:623599ms step_avg:403.62ms step:1556/3242 train_loss:3.4979 train_time:623994ms step_avg:403.62ms step:1557/3242 train_loss:3.3964 train_time:624388ms step_avg:403.61ms step:1558/3242 train_loss:3.4705 train_time:624782ms step_avg:403.61ms step:1559/3242 train_loss:3.3955 train_time:625177ms step_avg:403.60ms step:1560/3242 train_loss:3.4149 train_time:625570ms step_avg:403.59ms step:1561/3242 train_loss:3.5037 train_time:625965ms step_avg:403.59ms step:1562/3242 train_loss:3.3399 train_time:626359ms step_avg:403.58ms step:1563/3242 train_loss:3.4248 train_time:626754ms step_avg:403.58ms step:1564/3242 train_loss:3.6421 train_time:627148ms step_avg:403.57ms step:1565/3242 train_loss:3.3510 train_time:627542ms step_avg:403.56ms step:1566/3242 train_loss:3.4567 train_time:627937ms step_avg:403.56ms step:1567/3242 train_loss:3.6769 train_time:628331ms step_avg:403.55ms step:1568/3242 train_loss:3.3347 train_time:628726ms step_avg:403.55ms step:1569/3242 train_loss:3.3302 train_time:629120ms step_avg:403.54ms step:1570/3242 train_loss:3.4456 train_time:629515ms step_avg:403.54ms step:1571/3242 train_loss:3.4383 train_time:629909ms step_avg:403.53ms step:1572/3242 train_loss:3.3407 train_time:630304ms step_avg:403.52ms step:1573/3242 train_loss:3.6328 train_time:630699ms step_avg:403.52ms step:1574/3242 train_loss:3.5496 train_time:631094ms step_avg:403.51ms step:1575/3242 train_loss:3.2887 train_time:631489ms step_avg:403.51ms step:1576/3242 train_loss:3.4931 train_time:631883ms step_avg:403.50ms step:1577/3242 train_loss:3.4289 train_time:632278ms step_avg:403.50ms step:1578/3242 train_loss:3.4412 train_time:632672ms step_avg:403.49ms step:1579/3242 train_loss:3.5663 train_time:633068ms step_avg:403.48ms step:1580/3242 train_loss:3.4485 train_time:633462ms step_avg:403.48ms step:1581/3242 train_loss:3.4698 train_time:633857ms step_avg:403.47ms step:1582/3242 train_loss:3.4200 train_time:634252ms step_avg:403.47ms step:1583/3242 train_loss:3.3184 train_time:634646ms step_avg:403.46ms step:1584/3242 train_loss:3.3986 train_time:635041ms step_avg:403.46ms step:1585/3242 train_loss:3.4846 train_time:635436ms step_avg:403.45ms step:1586/3242 train_loss:3.4360 train_time:635831ms step_avg:403.45ms step:1587/3242 train_loss:3.9145 train_time:636226ms step_avg:403.44ms step:1588/3242 train_loss:3.4323 train_time:636621ms step_avg:403.44ms step:1589/3242 train_loss:3.5386 train_time:637016ms step_avg:403.43ms step:1590/3242 train_loss:3.2558 train_time:637411ms step_avg:403.42ms step:1591/3242 train_loss:3.6002 train_time:637807ms step_avg:403.42ms step:1592/3242 train_loss:3.4572 train_time:638202ms step_avg:403.41ms step:1593/3242 train_loss:3.5715 train_time:638595ms step_avg:403.41ms step:1594/3242 train_loss:3.4311 train_time:638988ms step_avg:403.40ms step:1595/3242 train_loss:3.4742 train_time:639383ms step_avg:403.40ms step:1596/3242 train_loss:3.4904 train_time:639777ms step_avg:403.39ms step:1597/3242 train_loss:3.4676 train_time:640170ms step_avg:403.38ms step:1598/3242 train_loss:3.5413 train_time:640566ms step_avg:403.38ms step:1599/3242 train_loss:3.8318 train_time:640961ms step_avg:403.37ms step:1600/3242 train_loss:3.3939 train_time:641357ms step_avg:403.37ms step:1601/3242 train_loss:3.4612 train_time:641752ms step_avg:403.36ms step:1602/3242 train_loss:3.5674 train_time:642147ms step_avg:403.36ms step:1603/3242 train_loss:3.5885 train_time:642544ms step_avg:403.35ms step:1604/3242 train_loss:3.4590 train_time:642940ms step_avg:403.35ms step:1605/3242 train_loss:3.8689 train_time:643337ms step_avg:403.35ms step:1606/3242 train_loss:3.5989 train_time:643731ms step_avg:403.34ms step:1607/3242 train_loss:3.2775 train_time:644126ms step_avg:403.34ms step:1608/3242 train_loss:3.4684 train_time:644521ms step_avg:403.33ms step:1609/3242 train_loss:3.3158 train_time:644916ms step_avg:403.32ms step:1610/3242 train_loss:3.6518 train_time:645312ms step_avg:403.32ms step:1611/3242 train_loss:3.4286 train_time:645706ms step_avg:403.31ms step:1612/3242 train_loss:3.4968 train_time:646102ms step_avg:403.31ms step:1613/3242 train_loss:3.5003 train_time:646497ms step_avg:403.30ms step:1614/3242 train_loss:3.8749 train_time:646892ms step_avg:403.30ms step:1615/3242 train_loss:3.6794 train_time:647288ms step_avg:403.29ms step:1616/3242 train_loss:3.5666 train_time:647683ms step_avg:403.29ms step:1617/3242 train_loss:3.4118 train_time:648079ms step_avg:403.29ms step:1618/3242 train_loss:3.4864 train_time:648476ms step_avg:403.28ms step:1619/3242 train_loss:3.3878 train_time:648871ms step_avg:403.28ms step:1620/3242 train_loss:3.4839 train_time:649267ms step_avg:403.27ms step:1621/3242 train_loss:3.6496 train_time:649663ms step_avg:403.27ms step:1622/3242 train_loss:3.6626 train_time:650060ms step_avg:403.26ms step:1623/3242 train_loss:3.3656 train_time:650456ms step_avg:403.26ms step:1624/3242 train_loss:3.5565 train_time:650851ms step_avg:403.25ms step:1625/3242 train_loss:3.5345 train_time:651246ms step_avg:403.25ms step:1625/3242 val_loss:3.4687 train_time:651273ms step_avg:403.26ms step:1626/3242 train_loss:3.3434 train_time:651642ms step_avg:403.24ms step:1627/3242 train_loss:3.5063 train_time:652036ms step_avg:403.24ms step:1628/3242 train_loss:3.3735 train_time:652431ms step_avg:403.23ms step:1629/3242 train_loss:3.4969 train_time:652826ms step_avg:403.23ms step:1630/3242 train_loss:3.4592 train_time:653221ms step_avg:403.22ms step:1631/3242 train_loss:3.4020 train_time:653616ms step_avg:403.22ms step:1632/3242 train_loss:3.4985 train_time:654011ms step_avg:403.21ms step:1633/3242 train_loss:3.4949 train_time:654405ms step_avg:403.21ms step:1634/3242 train_loss:3.4946 train_time:654799ms step_avg:403.20ms step:1635/3242 train_loss:3.3642 train_time:655193ms step_avg:403.20ms step:1636/3242 train_loss:3.2610 train_time:655588ms step_avg:403.19ms step:1637/3242 train_loss:3.4352 train_time:655983ms step_avg:403.19ms step:1638/3242 train_loss:3.4772 train_time:656380ms step_avg:403.18ms step:1639/3242 train_loss:3.3101 train_time:656775ms step_avg:403.18ms step:1640/3242 train_loss:3.5255 train_time:657171ms step_avg:403.17ms step:1641/3242 train_loss:3.6444 train_time:657567ms step_avg:403.17ms step:1642/3242 train_loss:3.6828 train_time:657962ms step_avg:403.16ms step:1643/3242 train_loss:4.0348 train_time:658357ms step_avg:403.16ms step:1644/3242 train_loss:3.5264 train_time:658752ms step_avg:403.15ms step:1645/3242 train_loss:3.5255 train_time:659148ms step_avg:403.15ms step:1646/3242 train_loss:3.6626 train_time:659544ms step_avg:403.14ms step:1647/3242 train_loss:3.6046 train_time:659940ms step_avg:403.14ms step:1648/3242 train_loss:3.3542 train_time:660336ms step_avg:403.14ms step:1649/3242 train_loss:3.4929 train_time:660733ms step_avg:403.13ms step:1650/3242 train_loss:3.1258 train_time:661129ms step_avg:403.13ms step:1651/3242 train_loss:3.4948 train_time:661526ms step_avg:403.12ms step:1652/3242 train_loss:3.4771 train_time:661922ms step_avg:403.12ms step:1653/3242 train_loss:3.4074 train_time:662317ms step_avg:403.11ms step:1654/3242 train_loss:3.3873 train_time:662711ms step_avg:403.11ms step:1655/3242 train_loss:3.4410 train_time:663107ms step_avg:403.10ms step:1656/3242 train_loss:3.4655 train_time:663502ms step_avg:403.10ms step:1657/3242 train_loss:3.6841 train_time:663897ms step_avg:403.09ms step:1658/3242 train_loss:3.3754 train_time:664293ms step_avg:403.09ms step:1659/3242 train_loss:3.4020 train_time:664688ms step_avg:403.09ms step:1660/3242 train_loss:3.4816 train_time:665083ms step_avg:403.08ms step:1661/3242 train_loss:3.4406 train_time:665478ms step_avg:403.08ms step:1662/3242 train_loss:3.3919 train_time:665873ms step_avg:403.07ms step:1663/3242 train_loss:3.4146 train_time:666268ms step_avg:403.07ms step:1664/3242 train_loss:3.4133 train_time:666664ms step_avg:403.06ms step:1665/3242 train_loss:3.5203 train_time:667060ms step_avg:403.06ms step:1666/3242 train_loss:3.4233 train_time:667456ms step_avg:403.05ms step:1667/3242 train_loss:3.4509 train_time:667852ms step_avg:403.05ms step:1668/3242 train_loss:3.5289 train_time:668248ms step_avg:403.04ms step:1669/3242 train_loss:3.4925 train_time:668643ms step_avg:403.04ms step:1670/3242 train_loss:3.8595 train_time:669037ms step_avg:403.03ms step:1671/3242 train_loss:3.2786 train_time:669433ms step_avg:403.03ms step:1672/3242 train_loss:3.4771 train_time:669829ms step_avg:403.03ms step:1673/3242 train_loss:3.3352 train_time:670225ms step_avg:403.02ms step:1674/3242 train_loss:3.2530 train_time:670620ms step_avg:403.02ms step:1675/3242 train_loss:3.6076 train_time:671015ms step_avg:403.01ms step:1676/3242 train_loss:3.7669 train_time:671411ms step_avg:403.01ms step:1677/3242 train_loss:3.4098 train_time:671805ms step_avg:403.00ms step:1678/3242 train_loss:3.3600 train_time:672200ms step_avg:403.00ms step:1679/3242 train_loss:3.3720 train_time:672594ms step_avg:402.99ms step:1680/3242 train_loss:3.5767 train_time:672989ms step_avg:402.99ms step:1681/3242 train_loss:3.3931 train_time:673385ms step_avg:402.98ms step:1682/3242 train_loss:3.4644 train_time:673780ms step_avg:402.98ms step:1683/3242 train_loss:3.4626 train_time:674176ms step_avg:402.97ms step:1684/3242 train_loss:3.0348 train_time:674572ms step_avg:402.97ms step:1685/3242 train_loss:3.4559 train_time:674966ms step_avg:402.96ms step:1686/3242 train_loss:3.5482 train_time:675362ms step_avg:402.96ms step:1687/3242 train_loss:3.5683 train_time:675757ms step_avg:402.96ms step:1688/3242 train_loss:3.5216 train_time:676152ms step_avg:402.95ms step:1689/3242 train_loss:3.3781 train_time:676547ms step_avg:402.95ms step:1690/3242 train_loss:3.5235 train_time:676943ms step_avg:402.94ms step:1691/3242 train_loss:3.3909 train_time:677339ms step_avg:402.94ms step:1692/3242 train_loss:3.3735 train_time:677733ms step_avg:402.93ms step:1693/3242 train_loss:3.4285 train_time:678128ms step_avg:402.93ms step:1694/3242 train_loss:3.2963 train_time:678523ms step_avg:402.92ms step:1695/3242 train_loss:3.4789 train_time:678918ms step_avg:402.92ms step:1696/3242 train_loss:3.4334 train_time:679312ms step_avg:402.91ms step:1697/3242 train_loss:3.3838 train_time:679706ms step_avg:402.91ms step:1698/3242 train_loss:3.9344 train_time:680101ms step_avg:402.90ms step:1699/3242 train_loss:3.6347 train_time:680495ms step_avg:402.90ms step:1700/3242 train_loss:3.4875 train_time:680890ms step_avg:402.89ms step:1701/3242 train_loss:3.4045 train_time:681284ms step_avg:402.89ms step:1702/3242 train_loss:3.1962 train_time:681679ms step_avg:402.88ms step:1703/3242 train_loss:3.4471 train_time:682072ms step_avg:402.88ms step:1704/3242 train_loss:3.4928 train_time:682466ms step_avg:402.87ms step:1705/3242 train_loss:3.4759 train_time:682861ms step_avg:402.87ms step:1706/3242 train_loss:3.2726 train_time:683256ms step_avg:402.86ms step:1707/3242 train_loss:3.7268 train_time:683650ms step_avg:402.86ms step:1708/3242 train_loss:3.3807 train_time:684046ms step_avg:402.85ms step:1709/3242 train_loss:3.9267 train_time:684441ms step_avg:402.85ms step:1710/3242 train_loss:3.3510 train_time:685029ms step_avg:402.96ms step:1711/3242 train_loss:3.2321 train_time:685432ms step_avg:402.96ms step:1712/3242 train_loss:3.5824 train_time:685827ms step_avg:402.95ms step:1713/3242 train_loss:3.3367 train_time:686223ms step_avg:402.95ms step:1714/3242 train_loss:3.3508 train_time:686620ms step_avg:402.95ms step:1715/3242 train_loss:3.3958 train_time:687211ms step_avg:403.06ms step:1716/3242 train_loss:3.6968 train_time:687605ms step_avg:403.05ms step:1717/3242 train_loss:3.4431 train_time:688000ms step_avg:403.05ms step:1718/3242 train_loss:3.4560 train_time:688395ms step_avg:403.04ms step:1719/3242 train_loss:3.5536 train_time:688790ms step_avg:403.04ms step:1720/3242 train_loss:3.4458 train_time:689186ms step_avg:403.03ms step:1721/3242 train_loss:3.6028 train_time:689581ms step_avg:403.03ms step:1722/3242 train_loss:3.3928 train_time:689975ms step_avg:403.02ms step:1723/3242 train_loss:3.3747 train_time:690371ms step_avg:403.02ms step:1724/3242 train_loss:3.4797 train_time:690768ms step_avg:403.02ms step:1725/3242 train_loss:3.1337 train_time:691164ms step_avg:403.01ms step:1726/3242 train_loss:3.3193 train_time:691561ms step_avg:403.01ms step:1727/3242 train_loss:3.3903 train_time:691957ms step_avg:403.00ms step:1728/3242 train_loss:3.4502 train_time:692351ms step_avg:403.00ms step:1729/3242 train_loss:3.5461 train_time:692745ms step_avg:402.99ms step:1730/3242 train_loss:3.5484 train_time:693141ms step_avg:402.99ms step:1731/3242 train_loss:3.5236 train_time:693535ms step_avg:402.98ms step:1732/3242 train_loss:3.3829 train_time:693930ms step_avg:402.98ms step:1733/3242 train_loss:3.3877 train_time:694325ms step_avg:402.97ms step:1734/3242 train_loss:3.3125 train_time:694722ms step_avg:402.97ms step:1735/3242 train_loss:3.3622 train_time:695116ms step_avg:402.97ms step:1736/3242 train_loss:3.3962 train_time:695513ms step_avg:402.96ms step:1737/3242 train_loss:3.3966 train_time:695907ms step_avg:402.96ms step:1738/3242 train_loss:3.3596 train_time:696302ms step_avg:402.95ms step:1739/3242 train_loss:3.7263 train_time:696697ms step_avg:402.95ms step:1740/3242 train_loss:3.5875 train_time:697092ms step_avg:402.94ms step:1741/3242 train_loss:3.5472 train_time:697489ms step_avg:402.94ms step:1742/3242 train_loss:3.3856 train_time:697886ms step_avg:402.94ms step:1743/3242 train_loss:3.7006 train_time:698281ms step_avg:402.93ms step:1744/3242 train_loss:3.4536 train_time:698677ms step_avg:402.93ms step:1745/3242 train_loss:3.5134 train_time:699072ms step_avg:402.92ms step:1746/3242 train_loss:3.5452 train_time:699467ms step_avg:402.92ms step:1747/3242 train_loss:3.3870 train_time:699862ms step_avg:402.91ms step:1748/3242 train_loss:3.3344 train_time:700256ms step_avg:402.91ms step:1749/3242 train_loss:3.4152 train_time:700650ms step_avg:402.90ms step:1750/3242 train_loss:3.4904 train_time:701045ms step_avg:402.90ms step:1750/3242 val_loss:3.4504 train_time:701071ms step_avg:402.91ms step:1751/3242 train_loss:3.5759 train_time:701442ms step_avg:402.90ms step:1752/3242 train_loss:3.4334 train_time:701838ms step_avg:402.89ms step:1753/3242 train_loss:3.4694 train_time:702232ms step_avg:402.89ms step:1754/3242 train_loss:3.4754 train_time:702626ms step_avg:402.88ms step:1755/3242 train_loss:3.5028 train_time:703021ms step_avg:402.88ms step:1756/3242 train_loss:3.5292 train_time:703416ms step_avg:402.87ms step:1757/3242 train_loss:3.6790 train_time:703810ms step_avg:402.87ms step:1758/3242 train_loss:3.4689 train_time:704204ms step_avg:402.86ms step:1759/3242 train_loss:3.5238 train_time:704598ms step_avg:402.86ms step:1760/3242 train_loss:3.4415 train_time:704991ms step_avg:402.85ms step:1761/3242 train_loss:3.3341 train_time:705387ms step_avg:402.85ms step:1762/3242 train_loss:3.5629 train_time:705782ms step_avg:402.84ms step:1763/3242 train_loss:3.3640 train_time:706177ms step_avg:402.84ms step:1764/3242 train_loss:3.5482 train_time:706572ms step_avg:402.83ms step:1765/3242 train_loss:3.3757 train_time:706967ms step_avg:402.83ms step:1766/3242 train_loss:3.6087 train_time:707363ms step_avg:402.83ms step:1767/3242 train_loss:3.4354 train_time:707757ms step_avg:402.82ms step:1768/3242 train_loss:3.4375 train_time:708152ms step_avg:402.82ms step:1769/3242 train_loss:3.4249 train_time:708548ms step_avg:402.81ms step:1770/3242 train_loss:3.3881 train_time:708944ms step_avg:402.81ms step:1771/3242 train_loss:3.4595 train_time:709338ms step_avg:402.80ms step:1772/3242 train_loss:3.4781 train_time:709733ms step_avg:402.80ms step:1773/3242 train_loss:3.4170 train_time:710127ms step_avg:402.79ms step:1774/3242 train_loss:3.4250 train_time:710522ms step_avg:402.79ms step:1775/3242 train_loss:3.4007 train_time:710916ms step_avg:402.79ms step:1776/3242 train_loss:3.5400 train_time:711310ms step_avg:402.78ms step:1777/3242 train_loss:3.6599 train_time:711704ms step_avg:402.78ms step:1778/3242 train_loss:3.3950 train_time:712098ms step_avg:402.77ms step:1779/3242 train_loss:3.4949 train_time:712493ms step_avg:402.77ms step:1780/3242 train_loss:3.5552 train_time:712888ms step_avg:402.76ms step:1781/3242 train_loss:3.3979 train_time:713283ms step_avg:402.76ms step:1782/3242 train_loss:3.3991 train_time:713677ms step_avg:402.75ms step:1783/3242 train_loss:3.4458 train_time:714072ms step_avg:402.75ms step:1784/3242 train_loss:3.5432 train_time:714468ms step_avg:402.74ms step:1785/3242 train_loss:3.5250 train_time:714864ms step_avg:402.74ms step:1786/3242 train_loss:3.3355 train_time:715258ms step_avg:402.74ms step:1787/3242 train_loss:3.5919 train_time:715652ms step_avg:402.73ms step:1788/3242 train_loss:3.6365 train_time:716047ms step_avg:402.73ms step:1789/3242 train_loss:3.4262 train_time:716442ms step_avg:402.72ms step:1790/3242 train_loss:3.5333 train_time:716836ms step_avg:402.72ms step:1791/3242 train_loss:3.5037 train_time:717230ms step_avg:402.71ms step:1792/3242 train_loss:3.3349 train_time:717626ms step_avg:402.71ms step:1793/3242 train_loss:3.4413 train_time:718020ms step_avg:402.70ms step:1794/3242 train_loss:3.5146 train_time:718415ms step_avg:402.70ms step:1795/3242 train_loss:3.5290 train_time:718810ms step_avg:402.69ms step:1796/3242 train_loss:3.3773 train_time:719205ms step_avg:402.69ms step:1797/3242 train_loss:3.3137 train_time:719599ms step_avg:402.69ms step:1798/3242 train_loss:3.5805 train_time:719993ms step_avg:402.68ms step:1799/3242 train_loss:3.5023 train_time:720387ms step_avg:402.68ms step:1800/3242 train_loss:3.6751 train_time:720783ms step_avg:402.67ms step:1801/3242 train_loss:3.4708 train_time:721177ms step_avg:402.67ms step:1802/3242 train_loss:3.6182 train_time:721572ms step_avg:402.66ms step:1803/3242 train_loss:3.5161 train_time:721967ms step_avg:402.66ms step:1804/3242 train_loss:3.2731 train_time:722363ms step_avg:402.65ms step:1805/3242 train_loss:3.3316 train_time:722758ms step_avg:402.65ms step:1806/3242 train_loss:3.4594 train_time:723152ms step_avg:402.65ms step:1807/3242 train_loss:3.2868 train_time:723547ms step_avg:402.64ms step:1808/3242 train_loss:3.5225 train_time:723941ms step_avg:402.64ms step:1809/3242 train_loss:3.3962 train_time:724335ms step_avg:402.63ms step:1810/3242 train_loss:3.5057 train_time:724730ms step_avg:402.63ms step:1811/3242 train_loss:3.4683 train_time:725125ms step_avg:402.62ms step:1812/3242 train_loss:3.3789 train_time:725520ms step_avg:402.62ms step:1813/3242 train_loss:3.4951 train_time:725915ms step_avg:402.62ms step:1814/3242 train_loss:3.3060 train_time:726310ms step_avg:402.61ms step:1815/3242 train_loss:3.4353 train_time:726704ms step_avg:402.61ms step:1816/3242 train_loss:3.6229 train_time:727099ms step_avg:402.60ms step:1817/3242 train_loss:3.3601 train_time:727493ms step_avg:402.60ms step:1818/3242 train_loss:3.5546 train_time:727888ms step_avg:402.59ms step:1819/3242 train_loss:3.3695 train_time:728283ms step_avg:402.59ms step:1820/3242 train_loss:3.5097 train_time:728677ms step_avg:402.58ms step:1821/3242 train_loss:3.4932 train_time:729073ms step_avg:402.58ms step:1822/3242 train_loss:3.6902 train_time:729469ms step_avg:402.58ms step:1823/3242 train_loss:3.4938 train_time:729864ms step_avg:402.57ms step:1824/3242 train_loss:3.3987 train_time:730259ms step_avg:402.57ms step:1825/3242 train_loss:3.4187 train_time:730654ms step_avg:402.56ms step:1826/3242 train_loss:3.3036 train_time:731049ms step_avg:402.56ms step:1827/3242 train_loss:3.4056 train_time:731444ms step_avg:402.56ms step:1828/3242 train_loss:3.5410 train_time:731839ms step_avg:402.55ms step:1829/3242 train_loss:3.3016 train_time:732234ms step_avg:402.55ms step:1830/3242 train_loss:4.3105 train_time:732630ms step_avg:402.54ms step:1831/3242 train_loss:3.3872 train_time:733024ms step_avg:402.54ms step:1832/3242 train_loss:3.2682 train_time:733419ms step_avg:402.54ms step:1833/3242 train_loss:3.4584 train_time:733814ms step_avg:402.53ms step:1834/3242 train_loss:3.3891 train_time:734209ms step_avg:402.53ms step:1835/3242 train_loss:3.5043 train_time:734604ms step_avg:402.52ms step:1836/3242 train_loss:3.5237 train_time:734998ms step_avg:402.52ms step:1837/3242 train_loss:3.4597 train_time:735393ms step_avg:402.51ms step:1838/3242 train_loss:3.3377 train_time:735787ms step_avg:402.51ms step:1839/3242 train_loss:3.5337 train_time:736182ms step_avg:402.51ms step:1840/3242 train_loss:3.3822 train_time:736577ms step_avg:402.50ms step:1841/3242 train_loss:3.5185 train_time:736970ms step_avg:402.50ms step:1842/3242 train_loss:3.4442 train_time:737366ms step_avg:402.49ms step:1843/3242 train_loss:3.2916 train_time:737761ms step_avg:402.49ms step:1844/3242 train_loss:3.5958 train_time:738156ms step_avg:402.48ms step:1845/3242 train_loss:3.4692 train_time:738551ms step_avg:402.48ms step:1846/3242 train_loss:3.4532 train_time:738948ms step_avg:402.48ms step:1847/3242 train_loss:3.5046 train_time:739344ms step_avg:402.47ms step:1848/3242 train_loss:3.2673 train_time:739739ms step_avg:402.47ms step:1849/3242 train_loss:3.2404 train_time:740134ms step_avg:402.47ms step:1850/3242 train_loss:3.4039 train_time:740530ms step_avg:402.46ms step:1851/3242 train_loss:3.4597 train_time:740925ms step_avg:402.46ms step:1852/3242 train_loss:3.5600 train_time:741320ms step_avg:402.45ms step:1853/3242 train_loss:3.4213 train_time:741715ms step_avg:402.45ms step:1854/3242 train_loss:3.5117 train_time:742111ms step_avg:402.45ms step:1855/3242 train_loss:3.5467 train_time:742506ms step_avg:402.44ms step:1856/3242 train_loss:3.4671 train_time:742902ms step_avg:402.44ms step:1857/3242 train_loss:3.4880 train_time:743297ms step_avg:402.43ms step:1858/3242 train_loss:3.5848 train_time:743691ms step_avg:402.43ms step:1859/3242 train_loss:3.2774 train_time:744085ms step_avg:402.43ms step:1860/3242 train_loss:3.3450 train_time:744479ms step_avg:402.42ms step:1861/3242 train_loss:3.3711 train_time:744873ms step_avg:402.42ms step:1862/3242 train_loss:3.3935 train_time:745268ms step_avg:402.41ms step:1863/3242 train_loss:3.4098 train_time:745664ms step_avg:402.41ms step:1864/3242 train_loss:3.3675 train_time:746058ms step_avg:402.40ms step:1865/3242 train_loss:3.4789 train_time:746453ms step_avg:402.40ms step:1866/3242 train_loss:3.4218 train_time:746847ms step_avg:402.40ms step:1867/3242 train_loss:3.4646 train_time:747241ms step_avg:402.39ms step:1868/3242 train_loss:3.6200 train_time:747635ms step_avg:402.39ms step:1869/3242 train_loss:3.2725 train_time:748030ms step_avg:402.38ms step:1870/3242 train_loss:3.4900 train_time:748423ms step_avg:402.38ms step:1871/3242 train_loss:3.4326 train_time:748817ms step_avg:402.37ms step:1872/3242 train_loss:3.3588 train_time:749212ms step_avg:402.37ms step:1873/3242 train_loss:3.3751 train_time:749607ms step_avg:402.37ms step:1874/3242 train_loss:3.3469 train_time:750001ms step_avg:402.36ms step:1875/3242 train_loss:3.3500 train_time:750395ms step_avg:402.36ms step:1875/3242 val_loss:3.4356 train_time:750420ms step_avg:402.37ms step:1876/3242 train_loss:3.4515 train_time:750789ms step_avg:402.35ms step:1877/3242 train_loss:3.2321 train_time:751183ms step_avg:402.35ms step:1878/3242 train_loss:3.3677 train_time:751578ms step_avg:402.34ms step:1879/3242 train_loss:3.4583 train_time:751972ms step_avg:402.34ms step:1880/3242 train_loss:3.8535 train_time:752366ms step_avg:402.33ms step:1881/3242 train_loss:3.4405 train_time:752761ms step_avg:402.33ms step:1882/3242 train_loss:3.3626 train_time:753155ms step_avg:402.33ms step:1883/3242 train_loss:3.5307 train_time:753549ms step_avg:402.32ms step:1884/3242 train_loss:3.4438 train_time:753943ms step_avg:402.32ms step:1885/3242 train_loss:3.4059 train_time:754337ms step_avg:402.31ms step:1886/3242 train_loss:3.4625 train_time:754731ms step_avg:402.31ms step:1887/3242 train_loss:3.2548 train_time:755125ms step_avg:402.30ms step:1888/3242 train_loss:3.4547 train_time:755520ms step_avg:402.30ms step:1889/3242 train_loss:3.7765 train_time:755914ms step_avg:402.30ms step:1890/3242 train_loss:3.4773 train_time:756307ms step_avg:402.29ms step:1891/3242 train_loss:3.4280 train_time:756703ms step_avg:402.29ms step:1892/3242 train_loss:3.3710 train_time:757097ms step_avg:402.28ms step:1893/3242 train_loss:3.2447 train_time:757492ms step_avg:402.28ms step:1894/3242 train_loss:3.3336 train_time:757886ms step_avg:402.27ms step:1895/3242 train_loss:3.3779 train_time:758280ms step_avg:402.27ms step:1896/3242 train_loss:3.5427 train_time:758675ms step_avg:402.27ms step:1897/3242 train_loss:3.4546 train_time:759070ms step_avg:402.26ms step:1898/3242 train_loss:3.6551 train_time:759466ms step_avg:402.26ms step:1899/3242 train_loss:3.5379 train_time:759861ms step_avg:402.26ms step:1900/3242 train_loss:3.5293 train_time:760445ms step_avg:402.35ms step:1901/3242 train_loss:3.7367 train_time:760848ms step_avg:402.35ms step:1902/3242 train_loss:3.4821 train_time:761243ms step_avg:402.35ms step:1903/3242 train_loss:3.3650 train_time:761638ms step_avg:402.34ms step:1904/3242 train_loss:3.4067 train_time:762033ms step_avg:402.34ms step:1905/3242 train_loss:3.3967 train_time:762620ms step_avg:402.44ms step:1906/3242 train_loss:3.3646 train_time:763015ms step_avg:402.43ms step:1907/3242 train_loss:3.4765 train_time:763410ms step_avg:402.43ms step:1908/3242 train_loss:3.3625 train_time:763804ms step_avg:402.43ms step:1909/3242 train_loss:3.5752 train_time:764200ms step_avg:402.42ms step:1910/3242 train_loss:3.4244 train_time:764594ms step_avg:402.42ms step:1911/3242 train_loss:3.4567 train_time:764988ms step_avg:402.41ms step:1912/3242 train_loss:3.4706 train_time:765383ms step_avg:402.41ms step:1913/3242 train_loss:3.2630 train_time:765778ms step_avg:402.41ms step:1914/3242 train_loss:3.4427 train_time:766174ms step_avg:402.40ms step:1915/3242 train_loss:3.4938 train_time:766569ms step_avg:402.40ms step:1916/3242 train_loss:3.4104 train_time:766964ms step_avg:402.39ms step:1917/3242 train_loss:3.3797 train_time:767359ms step_avg:402.39ms step:1918/3242 train_loss:2.9229 train_time:767755ms step_avg:402.39ms step:1919/3242 train_loss:3.3166 train_time:768149ms step_avg:402.38ms step:1920/3242 train_loss:3.5585 train_time:768543ms step_avg:402.38ms step:1921/3242 train_loss:3.6181 train_time:768938ms step_avg:402.37ms step:1922/3242 train_loss:3.4487 train_time:769333ms step_avg:402.37ms step:1923/3242 train_loss:3.4142 train_time:769727ms step_avg:402.37ms step:1924/3242 train_loss:3.6653 train_time:770122ms step_avg:402.36ms step:1925/3242 train_loss:3.3694 train_time:770516ms step_avg:402.36ms step:1926/3242 train_loss:3.4292 train_time:770910ms step_avg:402.35ms step:1927/3242 train_loss:3.4491 train_time:771305ms step_avg:402.35ms step:1928/3242 train_loss:3.5103 train_time:771701ms step_avg:402.35ms step:1929/3242 train_loss:3.1541 train_time:772097ms step_avg:402.34ms step:1930/3242 train_loss:3.3236 train_time:772492ms step_avg:402.34ms step:1931/3242 train_loss:3.2856 train_time:772887ms step_avg:402.34ms step:1932/3242 train_loss:3.3632 train_time:773282ms step_avg:402.33ms step:1933/3242 train_loss:3.1451 train_time:773676ms step_avg:402.33ms step:1934/3242 train_loss:3.3567 train_time:774070ms step_avg:402.32ms step:1935/3242 train_loss:3.6032 train_time:774465ms step_avg:402.32ms step:1936/3242 train_loss:3.4628 train_time:774858ms step_avg:402.31ms step:1937/3242 train_loss:3.5586 train_time:775253ms step_avg:402.31ms step:1938/3242 train_loss:3.3866 train_time:775647ms step_avg:402.31ms step:1939/3242 train_loss:3.3899 train_time:776041ms step_avg:402.30ms step:1940/3242 train_loss:3.4775 train_time:776435ms step_avg:402.30ms step:1941/3242 train_loss:3.3984 train_time:776829ms step_avg:402.29ms step:1942/3242 train_loss:3.5046 train_time:777225ms step_avg:402.29ms step:1943/3242 train_loss:3.3563 train_time:777620ms step_avg:402.29ms step:1944/3242 train_loss:3.5353 train_time:778016ms step_avg:402.28ms step:1945/3242 train_loss:3.6862 train_time:778411ms step_avg:402.28ms step:1946/3242 train_loss:3.4446 train_time:778805ms step_avg:402.28ms step:1947/3242 train_loss:3.4016 train_time:779201ms step_avg:402.27ms step:1948/3242 train_loss:3.6014 train_time:779596ms step_avg:402.27ms step:1949/3242 train_loss:3.2891 train_time:779991ms step_avg:402.26ms step:1950/3242 train_loss:3.4636 train_time:780386ms step_avg:402.26ms step:1951/3242 train_loss:3.4431 train_time:780780ms step_avg:402.26ms step:1952/3242 train_loss:3.3335 train_time:781175ms step_avg:402.25ms step:1953/3242 train_loss:3.3632 train_time:781570ms step_avg:402.25ms step:1954/3242 train_loss:3.3184 train_time:781965ms step_avg:402.25ms step:1955/3242 train_loss:3.3454 train_time:782360ms step_avg:402.24ms step:1956/3242 train_loss:3.3437 train_time:782756ms step_avg:402.24ms step:1957/3242 train_loss:3.5645 train_time:783151ms step_avg:402.23ms step:1958/3242 train_loss:3.5561 train_time:783546ms step_avg:402.23ms step:1959/3242 train_loss:3.4132 train_time:783941ms step_avg:402.23ms step:1960/3242 train_loss:3.2891 train_time:784336ms step_avg:402.22ms step:1961/3242 train_loss:3.2445 train_time:784731ms step_avg:402.22ms step:1962/3242 train_loss:3.6616 train_time:785127ms step_avg:402.22ms step:1963/3242 train_loss:3.4414 train_time:785522ms step_avg:402.21ms step:1964/3242 train_loss:3.5536 train_time:785917ms step_avg:402.21ms step:1965/3242 train_loss:3.3349 train_time:786313ms step_avg:402.21ms step:1966/3242 train_loss:3.5153 train_time:786708ms step_avg:402.20ms step:1967/3242 train_loss:3.5093 train_time:787104ms step_avg:402.20ms step:1968/3242 train_loss:3.4375 train_time:787656ms step_avg:402.28ms step:1969/3242 train_loss:3.3656 train_time:788051ms step_avg:402.27ms step:1970/3242 train_loss:3.6330 train_time:788447ms step_avg:402.27ms step:1971/3242 train_loss:3.3511 train_time:788844ms step_avg:402.27ms step:1972/3242 train_loss:3.3942 train_time:789239ms step_avg:402.26ms step:1973/3242 train_loss:3.4630 train_time:789634ms step_avg:402.26ms step:1974/3242 train_loss:3.2711 train_time:790029ms step_avg:402.25ms step:1975/3242 train_loss:3.0535 train_time:790424ms step_avg:402.25ms step:1976/3242 train_loss:3.1197 train_time:790820ms step_avg:402.25ms step:1977/3242 train_loss:3.4244 train_time:791215ms step_avg:402.24ms step:1978/3242 train_loss:3.3362 train_time:791611ms step_avg:402.24ms step:1979/3242 train_loss:3.2408 train_time:792006ms step_avg:402.24ms step:1980/3242 train_loss:3.5087 train_time:792402ms step_avg:402.23ms step:1981/3242 train_loss:3.3718 train_time:792796ms step_avg:402.23ms step:1982/3242 train_loss:3.9570 train_time:793191ms step_avg:402.23ms step:1983/3242 train_loss:3.3851 train_time:793586ms step_avg:402.22ms step:1984/3242 train_loss:3.4107 train_time:793980ms step_avg:402.22ms step:1985/3242 train_loss:3.4261 train_time:794375ms step_avg:402.22ms step:1986/3242 train_loss:3.4706 train_time:794770ms step_avg:402.21ms step:1987/3242 train_loss:3.4270 train_time:795165ms step_avg:402.21ms step:1988/3242 train_loss:3.3114 train_time:795561ms step_avg:402.20ms step:1989/3242 train_loss:3.5195 train_time:795957ms step_avg:402.20ms step:1990/3242 train_loss:3.5672 train_time:796354ms step_avg:402.20ms step:1991/3242 train_loss:3.3185 train_time:796750ms step_avg:402.20ms step:1992/3242 train_loss:3.3846 train_time:797146ms step_avg:402.19ms step:1993/3242 train_loss:3.4620 train_time:797541ms step_avg:402.19ms step:1994/3242 train_loss:3.5337 train_time:797937ms step_avg:402.19ms step:1995/3242 train_loss:3.3282 train_time:798332ms step_avg:402.18ms step:1996/3242 train_loss:3.3066 train_time:798728ms step_avg:402.18ms step:1997/3242 train_loss:3.6259 train_time:799124ms step_avg:402.18ms step:1998/3242 train_loss:3.3286 train_time:799520ms step_avg:402.17ms step:1999/3242 train_loss:3.3611 train_time:799915ms step_avg:402.17ms step:2000/3242 train_loss:3.4435 train_time:800311ms step_avg:402.17ms step:2000/3242 val_loss:3.4216 train_time:800337ms step_avg:402.18ms step:2001/3242 train_loss:3.3642 train_time:800707ms step_avg:402.16ms step:2002/3242 train_loss:3.3394 train_time:801103ms step_avg:402.16ms step:2003/3242 train_loss:3.3263 train_time:801498ms step_avg:402.16ms step:2004/3242 train_loss:3.4152 train_time:801894ms step_avg:402.15ms step:2005/3242 train_loss:3.5209 train_time:802288ms step_avg:402.15ms step:2006/3242 train_loss:3.4381 train_time:802683ms step_avg:402.15ms step:2007/3242 train_loss:3.7896 train_time:803080ms step_avg:402.14ms step:2008/3242 train_loss:3.4956 train_time:803475ms step_avg:402.14ms step:2009/3242 train_loss:3.3246 train_time:803869ms step_avg:402.14ms step:2010/3242 train_loss:3.3902 train_time:804263ms step_avg:402.13ms step:2011/3242 train_loss:3.2204 train_time:804657ms step_avg:402.13ms step:2012/3242 train_loss:3.5246 train_time:805051ms step_avg:402.12ms step:2013/3242 train_loss:3.4296 train_time:805446ms step_avg:402.12ms step:2014/3242 train_loss:3.4104 train_time:805841ms step_avg:402.12ms step:2015/3242 train_loss:3.4502 train_time:806236ms step_avg:402.11ms step:2016/3242 train_loss:3.5010 train_time:806632ms step_avg:402.11ms step:2017/3242 train_loss:3.3326 train_time:807027ms step_avg:402.11ms step:2018/3242 train_loss:3.4333 train_time:807422ms step_avg:402.10ms step:2019/3242 train_loss:3.3693 train_time:807818ms step_avg:402.10ms step:2020/3242 train_loss:3.3664 train_time:808214ms step_avg:402.10ms step:2021/3242 train_loss:3.6677 train_time:808609ms step_avg:402.09ms step:2022/3242 train_loss:3.3497 train_time:809003ms step_avg:402.09ms step:2023/3242 train_loss:3.6226 train_time:809398ms step_avg:402.09ms step:2024/3242 train_loss:3.3823 train_time:809975ms step_avg:402.17ms step:2025/3242 train_loss:3.1192 train_time:810370ms step_avg:402.17ms step:2026/3242 train_loss:3.3735 train_time:810764ms step_avg:402.16ms step:2027/3242 train_loss:3.3748 train_time:811159ms step_avg:402.16ms step:2028/3242 train_loss:3.4471 train_time:811554ms step_avg:402.16ms step:2029/3242 train_loss:3.5537 train_time:811950ms step_avg:402.15ms step:2030/3242 train_loss:3.3644 train_time:812345ms step_avg:402.15ms step:2031/3242 train_loss:2.8444 train_time:812740ms step_avg:402.15ms step:2032/3242 train_loss:3.3348 train_time:813135ms step_avg:402.14ms step:2033/3242 train_loss:3.4909 train_time:813532ms step_avg:402.14ms step:2034/3242 train_loss:3.3683 train_time:814078ms step_avg:402.21ms step:2035/3242 train_loss:3.4703 train_time:814473ms step_avg:402.21ms step:2036/3242 train_loss:3.2512 train_time:814869ms step_avg:402.21ms step:2037/3242 train_loss:3.4847 train_time:815264ms step_avg:402.20ms step:2038/3242 train_loss:3.2920 train_time:815659ms step_avg:402.20ms step:2039/3242 train_loss:3.4636 train_time:816054ms step_avg:402.20ms step:2040/3242 train_loss:3.4959 train_time:816449ms step_avg:402.19ms step:2041/3242 train_loss:3.5136 train_time:816846ms step_avg:402.19ms step:2042/3242 train_loss:3.3057 train_time:817242ms step_avg:402.19ms step:2043/3242 train_loss:3.6442 train_time:817637ms step_avg:402.18ms step:2044/3242 train_loss:3.4683 train_time:818031ms step_avg:402.18ms step:2045/3242 train_loss:3.5138 train_time:818425ms step_avg:402.17ms step:2046/3242 train_loss:3.3302 train_time:818952ms step_avg:402.24ms step:2047/3242 train_loss:3.4073 train_time:819346ms step_avg:402.23ms step:2048/3242 train_loss:3.4794 train_time:819740ms step_avg:402.23ms step:2049/3242 train_loss:3.4406 train_time:820135ms step_avg:402.22ms step:2050/3242 train_loss:3.1993 train_time:820684ms step_avg:402.30ms step:2051/3242 train_loss:3.2742 train_time:821086ms step_avg:402.30ms step:2052/3242 train_loss:3.4332 train_time:821480ms step_avg:402.29ms step:2053/3242 train_loss:3.3478 train_time:821875ms step_avg:402.29ms step:2054/3242 train_loss:3.2653 train_time:822269ms step_avg:402.28ms step:2055/3242 train_loss:3.5288 train_time:822664ms step_avg:402.28ms step:2056/3242 train_loss:3.4601 train_time:823058ms step_avg:402.28ms step:2057/3242 train_loss:3.3869 train_time:823452ms step_avg:402.27ms step:2058/3242 train_loss:3.4000 train_time:823846ms step_avg:402.27ms step:2059/3242 train_loss:3.4426 train_time:824241ms step_avg:402.26ms step:2060/3242 train_loss:3.3588 train_time:824636ms step_avg:402.26ms step:2061/3242 train_loss:3.6991 train_time:825032ms step_avg:402.26ms step:2062/3242 train_loss:3.6308 train_time:825427ms step_avg:402.26ms step:2063/3242 train_loss:3.3909 train_time:825822ms step_avg:402.25ms step:2064/3242 train_loss:3.5304 train_time:826217ms step_avg:402.25ms step:2065/3242 train_loss:3.4856 train_time:826612ms step_avg:402.24ms step:2066/3242 train_loss:3.4959 train_time:827007ms step_avg:402.24ms step:2067/3242 train_loss:3.4247 train_time:827403ms step_avg:402.24ms step:2068/3242 train_loss:3.7219 train_time:827797ms step_avg:402.23ms step:2069/3242 train_loss:3.4707 train_time:828192ms step_avg:402.23ms step:2070/3242 train_loss:3.5412 train_time:828588ms step_avg:402.23ms step:2071/3242 train_loss:3.3752 train_time:828982ms step_avg:402.22ms step:2072/3242 train_loss:3.3172 train_time:829377ms step_avg:402.22ms step:2073/3242 train_loss:3.3703 train_time:829772ms step_avg:402.22ms step:2074/3242 train_loss:3.6200 train_time:830167ms step_avg:402.21ms step:2075/3242 train_loss:3.5743 train_time:830562ms step_avg:402.21ms step:2076/3242 train_loss:3.4675 train_time:830958ms step_avg:402.21ms step:2077/3242 train_loss:3.4303 train_time:831353ms step_avg:402.20ms step:2078/3242 train_loss:3.4493 train_time:831748ms step_avg:402.20ms step:2079/3242 train_loss:3.4296 train_time:832142ms step_avg:402.20ms step:2080/3242 train_loss:3.1817 train_time:832536ms step_avg:402.19ms step:2081/3242 train_loss:3.5738 train_time:832931ms step_avg:402.19ms step:2082/3242 train_loss:3.2571 train_time:833325ms step_avg:402.18ms step:2083/3242 train_loss:3.2574 train_time:833721ms step_avg:402.18ms step:2084/3242 train_loss:3.3387 train_time:834117ms step_avg:402.18ms step:2085/3242 train_loss:3.3979 train_time:834513ms step_avg:402.17ms step:2086/3242 train_loss:3.2783 train_time:834908ms step_avg:402.17ms step:2087/3242 train_loss:3.5203 train_time:835303ms step_avg:402.17ms step:2088/3242 train_loss:3.3257 train_time:835699ms step_avg:402.17ms step:2089/3242 train_loss:3.2771 train_time:836094ms step_avg:402.16ms step:2090/3242 train_loss:3.4524 train_time:836673ms step_avg:402.25ms step:2091/3242 train_loss:3.5073 train_time:837229ms step_avg:402.32ms step:2092/3242 train_loss:3.4258 train_time:837623ms step_avg:402.32ms step:2093/3242 train_loss:3.6860 train_time:838018ms step_avg:402.31ms step:2094/3242 train_loss:3.5203 train_time:838414ms step_avg:402.31ms step:2095/3242 train_loss:3.5274 train_time:838812ms step_avg:402.31ms step:2096/3242 train_loss:3.4432 train_time:839398ms step_avg:402.40ms step:2097/3242 train_loss:3.3702 train_time:839794ms step_avg:402.39ms step:2098/3242 train_loss:3.5402 train_time:840190ms step_avg:402.39ms step:2099/3242 train_loss:3.3713 train_time:840586ms step_avg:402.39ms step:2100/3242 train_loss:3.3371 train_time:840982ms step_avg:402.38ms step:2101/3242 train_loss:2.9688 train_time:841378ms step_avg:402.38ms step:2102/3242 train_loss:3.4813 train_time:841772ms step_avg:402.38ms step:2103/3242 train_loss:3.4400 train_time:842167ms step_avg:402.37ms step:2104/3242 train_loss:3.5118 train_time:842561ms step_avg:402.37ms step:2105/3242 train_loss:3.4313 train_time:842955ms step_avg:402.36ms step:2106/3242 train_loss:3.1833 train_time:843348ms step_avg:402.36ms step:2107/3242 train_loss:3.3678 train_time:843742ms step_avg:402.36ms step:2108/3242 train_loss:3.6712 train_time:844137ms step_avg:402.35ms step:2109/3242 train_loss:3.2139 train_time:844532ms step_avg:402.35ms step:2110/3242 train_loss:3.3467 train_time:844926ms step_avg:402.35ms step:2111/3242 train_loss:3.3568 train_time:845319ms step_avg:402.34ms step:2112/3242 train_loss:3.3447 train_time:845714ms step_avg:402.34ms step:2113/3242 train_loss:3.4637 train_time:846109ms step_avg:402.33ms step:2114/3242 train_loss:3.6781 train_time:846504ms step_avg:402.33ms step:2115/3242 train_loss:3.3433 train_time:846900ms step_avg:402.33ms step:2116/3242 train_loss:3.3570 train_time:847295ms step_avg:402.32ms step:2117/3242 train_loss:3.2694 train_time:847690ms step_avg:402.32ms step:2118/3242 train_loss:3.3407 train_time:848084ms step_avg:402.32ms step:2119/3242 train_loss:3.4706 train_time:848478ms step_avg:402.31ms step:2120/3242 train_loss:3.4607 train_time:848873ms step_avg:402.31ms step:2121/3242 train_loss:3.5222 train_time:849267ms step_avg:402.31ms step:2122/3242 train_loss:3.5025 train_time:849822ms step_avg:402.38ms step:2123/3242 train_loss:3.4653 train_time:850223ms step_avg:402.38ms step:2124/3242 train_loss:3.4656 train_time:850618ms step_avg:402.37ms step:2125/3242 train_loss:3.3828 train_time:851012ms step_avg:402.37ms step:2125/3242 val_loss:3.4122 train_time:851038ms step_avg:402.38ms step:2126/3242 train_loss:3.2778 train_time:851409ms step_avg:402.37ms step:2127/3242 train_loss:3.5457 train_time:851805ms step_avg:402.36ms step:2128/3242 train_loss:3.4240 train_time:852201ms step_avg:402.36ms step:2129/3242 train_loss:3.4282 train_time:852596ms step_avg:402.36ms step:2130/3242 train_loss:3.3820 train_time:852992ms step_avg:402.35ms step:2131/3242 train_loss:3.5089 train_time:853387ms step_avg:402.35ms step:2132/3242 train_loss:3.3985 train_time:853782ms step_avg:402.35ms step:2133/3242 train_loss:3.2500 train_time:854176ms step_avg:402.34ms step:2134/3242 train_loss:3.3286 train_time:854570ms step_avg:402.34ms step:2135/3242 train_loss:3.4103 train_time:854965ms step_avg:402.34ms step:2136/3242 train_loss:3.2411 train_time:855360ms step_avg:402.33ms step:2137/3242 train_loss:3.3336 train_time:855754ms step_avg:402.33ms step:2138/3242 train_loss:3.3900 train_time:856148ms step_avg:402.33ms step:2139/3242 train_loss:3.4443 train_time:856543ms step_avg:402.32ms step:2140/3242 train_loss:3.4756 train_time:856937ms step_avg:402.32ms step:2141/3242 train_loss:3.5435 train_time:857332ms step_avg:402.31ms step:2142/3242 train_loss:3.4555 train_time:857728ms step_avg:402.31ms step:2143/3242 train_loss:3.5800 train_time:858124ms step_avg:402.31ms step:2144/3242 train_loss:3.5964 train_time:858518ms step_avg:402.30ms step:2145/3242 train_loss:3.3209 train_time:858912ms step_avg:402.30ms step:2146/3242 train_loss:3.9071 train_time:859307ms step_avg:402.30ms step:2147/3242 train_loss:3.3678 train_time:859702ms step_avg:402.29ms step:2148/3242 train_loss:3.4594 train_time:860097ms step_avg:402.29ms step:2149/3242 train_loss:3.3813 train_time:860493ms step_avg:402.29ms step:2150/3242 train_loss:3.5488 train_time:860889ms step_avg:402.28ms step:2151/3242 train_loss:3.1407 train_time:861284ms step_avg:402.28ms step:2152/3242 train_loss:3.3235 train_time:861679ms step_avg:402.28ms step:2153/3242 train_loss:3.7901 train_time:862073ms step_avg:402.27ms step:2154/3242 train_loss:3.5064 train_time:862469ms step_avg:402.27ms step:2155/3242 train_loss:3.3763 train_time:862864ms step_avg:402.27ms step:2156/3242 train_loss:3.2946 train_time:863259ms step_avg:402.26ms step:2157/3242 train_loss:3.5878 train_time:863655ms step_avg:402.26ms step:2158/3242 train_loss:3.5845 train_time:864051ms step_avg:402.26ms step:2159/3242 train_loss:3.5967 train_time:864445ms step_avg:402.25ms step:2160/3242 train_loss:3.3300 train_time:864840ms step_avg:402.25ms step:2161/3242 train_loss:3.4870 train_time:865237ms step_avg:402.25ms step:2162/3242 train_loss:3.3789 train_time:865633ms step_avg:402.25ms step:2163/3242 train_loss:3.3351 train_time:866029ms step_avg:402.24ms step:2164/3242 train_loss:3.1532 train_time:866426ms step_avg:402.24ms step:2165/3242 train_loss:3.4315 train_time:866822ms step_avg:402.24ms step:2166/3242 train_loss:3.4580 train_time:867218ms step_avg:402.23ms step:2167/3242 train_loss:3.4601 train_time:867613ms step_avg:402.23ms step:2168/3242 train_loss:3.5181 train_time:868009ms step_avg:402.23ms step:2169/3242 train_loss:3.4365 train_time:868404ms step_avg:402.23ms step:2170/3242 train_loss:3.3220 train_time:868799ms step_avg:402.22ms step:2171/3242 train_loss:3.2804 train_time:869194ms step_avg:402.22ms step:2172/3242 train_loss:3.4880 train_time:869589ms step_avg:402.21ms step:2173/3242 train_loss:3.4040 train_time:869984ms step_avg:402.21ms step:2174/3242 train_loss:3.0971 train_time:870379ms step_avg:402.21ms step:2175/3242 train_loss:3.6443 train_time:870774ms step_avg:402.21ms step:2176/3242 train_loss:3.3673 train_time:871169ms step_avg:402.20ms step:2177/3242 train_loss:3.3936 train_time:871565ms step_avg:402.20ms step:2178/3242 train_loss:3.4412 train_time:871960ms step_avg:402.20ms step:2179/3242 train_loss:3.4260 train_time:872355ms step_avg:402.19ms step:2180/3242 train_loss:3.4892 train_time:872749ms step_avg:402.19ms step:2181/3242 train_loss:3.4544 train_time:873146ms step_avg:402.19ms step:2182/3242 train_loss:3.3579 train_time:873540ms step_avg:402.18ms step:2183/3242 train_loss:3.6425 train_time:873935ms step_avg:402.18ms step:2184/3242 train_loss:3.3747 train_time:874330ms step_avg:402.18ms step:2185/3242 train_loss:3.3210 train_time:874726ms step_avg:402.17ms step:2186/3242 train_loss:4.1844 train_time:875122ms step_avg:402.17ms step:2187/3242 train_loss:3.6407 train_time:875518ms step_avg:402.17ms step:2188/3242 train_loss:3.4304 train_time:875915ms step_avg:402.16ms step:2189/3242 train_loss:3.2737 train_time:876311ms step_avg:402.16ms step:2190/3242 train_loss:3.3914 train_time:876707ms step_avg:402.16ms step:2191/3242 train_loss:3.4604 train_time:877103ms step_avg:402.16ms step:2192/3242 train_loss:3.3401 train_time:877500ms step_avg:402.15ms step:2193/3242 train_loss:3.3233 train_time:877897ms step_avg:402.15ms step:2194/3242 train_loss:3.4426 train_time:878293ms step_avg:402.15ms step:2195/3242 train_loss:3.2208 train_time:878689ms step_avg:402.15ms step:2196/3242 train_loss:3.6234 train_time:879084ms step_avg:402.14ms step:2197/3242 train_loss:3.4769 train_time:879481ms step_avg:402.14ms step:2198/3242 train_loss:3.5671 train_time:879876ms step_avg:402.14ms step:2199/3242 train_loss:3.5605 train_time:880272ms step_avg:402.13ms step:2200/3242 train_loss:3.6225 train_time:880669ms step_avg:402.13ms step:2201/3242 train_loss:3.5152 train_time:881065ms step_avg:402.13ms step:2202/3242 train_loss:3.3881 train_time:881460ms step_avg:402.13ms step:2203/3242 train_loss:3.5137 train_time:881854ms step_avg:402.12ms step:2204/3242 train_loss:3.4830 train_time:882249ms step_avg:402.12ms step:2205/3242 train_loss:3.5058 train_time:882643ms step_avg:402.12ms step:2206/3242 train_loss:3.4923 train_time:883038ms step_avg:402.11ms step:2207/3242 train_loss:3.2438 train_time:883432ms step_avg:402.11ms step:2208/3242 train_loss:3.7531 train_time:883827ms step_avg:402.11ms step:2209/3242 train_loss:3.3350 train_time:884221ms step_avg:402.10ms step:2210/3242 train_loss:3.3628 train_time:884616ms step_avg:402.10ms step:2211/3242 train_loss:3.6365 train_time:885010ms step_avg:402.09ms step:2212/3242 train_loss:3.2953 train_time:885405ms step_avg:402.09ms step:2213/3242 train_loss:3.4828 train_time:885801ms step_avg:402.09ms step:2214/3242 train_loss:3.4061 train_time:886196ms step_avg:402.09ms step:2215/3242 train_loss:3.4637 train_time:886590ms step_avg:402.08ms step:2216/3242 train_loss:3.3302 train_time:886984ms step_avg:402.08ms step:2217/3242 train_loss:3.3276 train_time:887379ms step_avg:402.07ms step:2218/3242 train_loss:3.7223 train_time:887774ms step_avg:402.07ms step:2219/3242 train_loss:3.5332 train_time:888169ms step_avg:402.07ms step:2220/3242 train_loss:3.4024 train_time:888566ms step_avg:402.07ms step:2221/3242 train_loss:3.6688 train_time:888961ms step_avg:402.06ms step:2222/3242 train_loss:3.7129 train_time:889356ms step_avg:402.06ms step:2223/3242 train_loss:3.4682 train_time:889751ms step_avg:402.06ms step:2224/3242 train_loss:3.4440 train_time:890146ms step_avg:402.05ms step:2225/3242 train_loss:3.4146 train_time:890541ms step_avg:402.05ms step:2226/3242 train_loss:3.4435 train_time:890937ms step_avg:402.05ms step:2227/3242 train_loss:3.5154 train_time:891331ms step_avg:402.04ms step:2228/3242 train_loss:3.4333 train_time:891726ms step_avg:402.04ms step:2229/3242 train_loss:3.5635 train_time:892120ms step_avg:402.04ms step:2230/3242 train_loss:3.3706 train_time:892515ms step_avg:402.03ms step:2231/3242 train_loss:3.3929 train_time:892910ms step_avg:402.03ms step:2232/3242 train_loss:3.3655 train_time:893306ms step_avg:402.03ms step:2233/3242 train_loss:3.7291 train_time:893702ms step_avg:402.03ms step:2234/3242 train_loss:3.2613 train_time:894098ms step_avg:402.02ms step:2235/3242 train_loss:3.5124 train_time:894492ms step_avg:402.02ms step:2236/3242 train_loss:3.2279 train_time:894888ms step_avg:402.02ms step:2237/3242 train_loss:3.2192 train_time:895284ms step_avg:402.01ms step:2238/3242 train_loss:3.3325 train_time:895679ms step_avg:402.01ms step:2239/3242 train_loss:3.4996 train_time:896074ms step_avg:402.01ms step:2240/3242 train_loss:3.3627 train_time:896469ms step_avg:402.00ms step:2241/3242 train_loss:3.4853 train_time:896863ms step_avg:402.00ms step:2242/3242 train_loss:3.4581 train_time:897257ms step_avg:402.00ms step:2243/3242 train_loss:3.4520 train_time:897651ms step_avg:401.99ms step:2244/3242 train_loss:3.5846 train_time:898045ms step_avg:401.99ms step:2245/3242 train_loss:3.4219 train_time:898439ms step_avg:401.99ms step:2246/3242 train_loss:3.2785 train_time:898833ms step_avg:401.98ms step:2247/3242 train_loss:3.3407 train_time:899228ms step_avg:401.98ms step:2248/3242 train_loss:3.4173 train_time:899623ms step_avg:401.98ms step:2249/3242 train_loss:3.4002 train_time:900018ms step_avg:401.97ms step:2250/3242 train_loss:3.2912 train_time:900414ms step_avg:401.97ms step:2250/3242 val_loss:3.4008 train_time:900440ms step_avg:401.98ms step:2251/3242 train_loss:3.2338 train_time:900810ms step_avg:401.97ms step:2252/3242 train_loss:3.2995 train_time:901204ms step_avg:401.96ms step:2253/3242 train_loss:3.4840 train_time:901598ms step_avg:401.96ms step:2254/3242 train_loss:3.3559 train_time:901993ms step_avg:401.96ms step:2255/3242 train_loss:3.3633 train_time:902388ms step_avg:401.95ms step:2256/3242 train_loss:3.3618 train_time:902782ms step_avg:401.95ms step:2257/3242 train_loss:3.3848 train_time:903178ms step_avg:401.95ms step:2258/3242 train_loss:3.6189 train_time:903573ms step_avg:401.95ms step:2259/3242 train_loss:3.1908 train_time:903967ms step_avg:401.94ms step:2260/3242 train_loss:3.1590 train_time:904363ms step_avg:401.94ms step:2261/3242 train_loss:3.8072 train_time:904758ms step_avg:401.94ms step:2262/3242 train_loss:3.2636 train_time:905153ms step_avg:401.93ms step:2263/3242 train_loss:5.2380 train_time:905547ms step_avg:401.93ms step:2264/3242 train_loss:3.3789 train_time:905942ms step_avg:401.93ms step:2265/3242 train_loss:3.6519 train_time:906336ms step_avg:401.92ms step:2266/3242 train_loss:3.3630 train_time:906731ms step_avg:401.92ms step:2267/3242 train_loss:3.4185 train_time:907127ms step_avg:401.92ms step:2268/3242 train_loss:3.3561 train_time:907521ms step_avg:401.91ms step:2269/3242 train_loss:3.4249 train_time:907916ms step_avg:401.91ms step:2270/3242 train_loss:3.5207 train_time:908311ms step_avg:401.91ms step:2271/3242 train_loss:3.2638 train_time:908706ms step_avg:401.90ms step:2272/3242 train_loss:3.3056 train_time:909101ms step_avg:401.90ms step:2273/3242 train_loss:3.5292 train_time:909496ms step_avg:401.90ms step:2274/3242 train_loss:3.3218 train_time:909891ms step_avg:401.90ms step:2275/3242 train_loss:3.5332 train_time:910287ms step_avg:401.89ms step:2276/3242 train_loss:3.2710 train_time:910683ms step_avg:401.89ms step:2277/3242 train_loss:3.5698 train_time:911079ms step_avg:401.89ms step:2278/3242 train_loss:3.5018 train_time:911475ms step_avg:401.88ms step:2279/3242 train_loss:3.2959 train_time:911871ms step_avg:401.88ms step:2280/3242 train_loss:3.3990 train_time:912457ms step_avg:401.96ms step:2281/3242 train_loss:3.7525 train_time:912861ms step_avg:401.96ms step:2282/3242 train_loss:3.3502 train_time:913257ms step_avg:401.96ms step:2283/3242 train_loss:3.2270 train_time:913651ms step_avg:401.96ms step:2284/3242 train_loss:3.4061 train_time:914047ms step_avg:401.96ms step:2285/3242 train_loss:3.3296 train_time:914443ms step_avg:401.95ms step:2286/3242 train_loss:3.1901 train_time:915035ms step_avg:402.04ms step:2287/3242 train_loss:3.2564 train_time:915430ms step_avg:402.03ms step:2288/3242 train_loss:3.3799 train_time:915826ms step_avg:402.03ms step:2289/3242 train_loss:3.3056 train_time:916221ms step_avg:402.03ms step:2290/3242 train_loss:3.4397 train_time:916616ms step_avg:402.02ms step:2291/3242 train_loss:3.4234 train_time:917012ms step_avg:402.02ms step:2292/3242 train_loss:3.2457 train_time:917408ms step_avg:402.02ms step:2293/3242 train_loss:3.4300 train_time:917803ms step_avg:402.02ms step:2294/3242 train_loss:3.4275 train_time:918199ms step_avg:402.01ms step:2295/3242 train_loss:3.6286 train_time:918595ms step_avg:402.01ms step:2296/3242 train_loss:3.2401 train_time:918992ms step_avg:402.01ms step:2297/3242 train_loss:4.0451 train_time:919389ms step_avg:402.01ms step:2298/3242 train_loss:3.3435 train_time:919784ms step_avg:402.00ms step:2299/3242 train_loss:3.2775 train_time:920178ms step_avg:402.00ms step:2300/3242 train_loss:3.4426 train_time:920573ms step_avg:402.00ms step:2301/3242 train_loss:3.3597 train_time:920967ms step_avg:401.99ms step:2302/3242 train_loss:3.4322 train_time:921361ms step_avg:401.99ms step:2303/3242 train_loss:3.8414 train_time:921755ms step_avg:401.99ms step:2304/3242 train_loss:3.4394 train_time:922149ms step_avg:401.98ms step:2305/3242 train_loss:3.2125 train_time:922543ms step_avg:401.98ms step:2306/3242 train_loss:3.6260 train_time:922938ms step_avg:401.98ms step:2307/3242 train_loss:3.2543 train_time:923333ms step_avg:401.97ms step:2308/3242 train_loss:3.4023 train_time:923728ms step_avg:401.97ms step:2309/3242 train_loss:3.3558 train_time:924123ms step_avg:401.97ms step:2310/3242 train_loss:3.3098 train_time:924518ms step_avg:401.96ms step:2311/3242 train_loss:3.6218 train_time:924914ms step_avg:401.96ms step:2312/3242 train_loss:3.2344 train_time:925311ms step_avg:401.96ms step:2313/3242 train_loss:3.4460 train_time:925707ms step_avg:401.96ms step:2314/3242 train_loss:3.4131 train_time:926101ms step_avg:401.95ms step:2315/3242 train_loss:3.5367 train_time:926496ms step_avg:401.95ms step:2316/3242 train_loss:3.2375 train_time:926889ms step_avg:401.95ms step:2317/3242 train_loss:3.4270 train_time:927283ms step_avg:401.94ms step:2318/3242 train_loss:3.4813 train_time:927677ms step_avg:401.94ms step:2319/3242 train_loss:3.4369 train_time:928073ms step_avg:401.94ms step:2320/3242 train_loss:3.3045 train_time:928468ms step_avg:401.93ms step:2321/3242 train_loss:3.3990 train_time:928862ms step_avg:401.93ms step:2322/3242 train_loss:3.2284 train_time:929259ms step_avg:401.93ms step:2323/3242 train_loss:3.4208 train_time:929654ms step_avg:401.93ms step:2324/3242 train_loss:3.3577 train_time:930049ms step_avg:401.92ms step:2325/3242 train_loss:3.0659 train_time:930445ms step_avg:401.92ms step:2326/3242 train_loss:3.3328 train_time:930840ms step_avg:401.92ms step:2327/3242 train_loss:3.4656 train_time:931235ms step_avg:401.91ms step:2328/3242 train_loss:3.3829 train_time:931630ms step_avg:401.91ms step:2329/3242 train_loss:3.4049 train_time:932026ms step_avg:401.91ms step:2330/3242 train_loss:3.4242 train_time:932420ms step_avg:401.91ms step:2331/3242 train_loss:3.5120 train_time:932816ms step_avg:401.90ms step:2332/3242 train_loss:3.5102 train_time:933210ms step_avg:401.90ms step:2333/3242 train_loss:3.2331 train_time:933605ms step_avg:401.90ms step:2334/3242 train_loss:3.3238 train_time:934000ms step_avg:401.89ms step:2335/3242 train_loss:3.3152 train_time:934395ms step_avg:401.89ms step:2336/3242 train_loss:3.2831 train_time:934789ms step_avg:401.89ms step:2337/3242 train_loss:3.0857 train_time:935183ms step_avg:401.88ms step:2338/3242 train_loss:3.3755 train_time:935578ms step_avg:401.88ms step:2339/3242 train_loss:3.3684 train_time:935972ms step_avg:401.88ms step:2340/3242 train_loss:3.4531 train_time:936366ms step_avg:401.87ms step:2341/3242 train_loss:3.2548 train_time:936760ms step_avg:401.87ms step:2342/3242 train_loss:3.4128 train_time:937154ms step_avg:401.87ms step:2343/3242 train_loss:3.3663 train_time:937548ms step_avg:401.86ms step:2344/3242 train_loss:3.2830 train_time:937941ms step_avg:401.86ms step:2345/3242 train_loss:3.4936 train_time:938335ms step_avg:401.86ms step:2346/3242 train_loss:3.4002 train_time:938730ms step_avg:401.85ms step:2347/3242 train_loss:3.3552 train_time:939126ms step_avg:401.85ms step:2348/3242 train_loss:3.3629 train_time:939521ms step_avg:401.85ms step:2349/3242 train_loss:3.4565 train_time:939915ms step_avg:401.84ms step:2350/3242 train_loss:3.3590 train_time:940309ms step_avg:401.84ms step:2351/3242 train_loss:3.4306 train_time:940703ms step_avg:401.84ms step:2352/3242 train_loss:3.4379 train_time:941097ms step_avg:401.83ms step:2353/3242 train_loss:3.3937 train_time:941491ms step_avg:401.83ms step:2354/3242 train_loss:3.4346 train_time:941886ms step_avg:401.83ms step:2355/3242 train_loss:3.2274 train_time:942281ms step_avg:401.83ms step:2356/3242 train_loss:3.2120 train_time:942676ms step_avg:401.82ms step:2357/3242 train_loss:3.3503 train_time:943072ms step_avg:401.82ms step:2358/3242 train_loss:3.3697 train_time:943467ms step_avg:401.82ms step:2359/3242 train_loss:3.4650 train_time:943861ms step_avg:401.81ms step:2360/3242 train_loss:3.3594 train_time:944256ms step_avg:401.81ms step:2361/3242 train_loss:3.2090 train_time:944650ms step_avg:401.81ms step:2362/3242 train_loss:3.5199 train_time:945043ms step_avg:401.80ms step:2363/3242 train_loss:3.1975 train_time:945437ms step_avg:401.80ms step:2364/3242 train_loss:3.2903 train_time:945832ms step_avg:401.80ms step:2365/3242 train_loss:3.2397 train_time:946226ms step_avg:401.79ms step:2366/3242 train_loss:3.2358 train_time:946620ms step_avg:401.79ms step:2367/3242 train_loss:3.3359 train_time:947014ms step_avg:401.79ms step:2368/3242 train_loss:3.2147 train_time:947409ms step_avg:401.78ms step:2369/3242 train_loss:3.3797 train_time:947803ms step_avg:401.78ms step:2370/3242 train_loss:3.3191 train_time:948197ms step_avg:401.78ms step:2371/3242 train_loss:3.3891 train_time:948593ms step_avg:401.78ms step:2372/3242 train_loss:3.4566 train_time:948988ms step_avg:401.77ms step:2373/3242 train_loss:3.3754 train_time:949383ms step_avg:401.77ms step:2374/3242 train_loss:3.3854 train_time:949778ms step_avg:401.77ms step:2375/3242 train_loss:3.4779 train_time:950172ms step_avg:401.76ms step:2375/3242 val_loss:3.3868 train_time:950197ms step_avg:401.77ms step:2376/3242 train_loss:3.2856 train_time:950567ms step_avg:401.76ms step:2377/3242 train_loss:3.5418 train_time:950963ms step_avg:401.76ms step:2378/3242 train_loss:3.3500 train_time:951357ms step_avg:401.76ms step:2379/3242 train_loss:4.0542 train_time:951753ms step_avg:401.75ms step:2380/3242 train_loss:3.3325 train_time:952148ms step_avg:401.75ms step:2381/3242 train_loss:3.2627 train_time:952543ms step_avg:401.75ms step:2382/3242 train_loss:3.5893 train_time:952937ms step_avg:401.74ms step:2383/3242 train_loss:3.4877 train_time:953333ms step_avg:401.74ms step:2384/3242 train_loss:3.5084 train_time:953728ms step_avg:401.74ms step:2385/3242 train_loss:3.4171 train_time:954123ms step_avg:401.74ms step:2386/3242 train_loss:3.2868 train_time:954518ms step_avg:401.73ms step:2387/3242 train_loss:3.4415 train_time:954912ms step_avg:401.73ms step:2388/3242 train_loss:3.4758 train_time:955307ms step_avg:401.73ms step:2389/3242 train_loss:3.3209 train_time:955704ms step_avg:401.72ms step:2390/3242 train_loss:3.2770 train_time:956101ms step_avg:401.72ms step:2391/3242 train_loss:3.4515 train_time:956495ms step_avg:401.72ms step:2392/3242 train_loss:3.4459 train_time:956889ms step_avg:401.72ms step:2393/3242 train_loss:3.4916 train_time:957283ms step_avg:401.71ms step:2394/3242 train_loss:3.4105 train_time:957678ms step_avg:401.71ms step:2395/3242 train_loss:3.4903 train_time:958072ms step_avg:401.71ms step:2396/3242 train_loss:3.3096 train_time:958467ms step_avg:401.70ms step:2397/3242 train_loss:3.2777 train_time:958862ms step_avg:401.70ms step:2398/3242 train_loss:3.5302 train_time:959256ms step_avg:401.70ms step:2399/3242 train_loss:3.2837 train_time:959650ms step_avg:401.70ms step:2400/3242 train_loss:3.5071 train_time:960045ms step_avg:401.69ms step:2401/3242 train_loss:3.5439 train_time:960440ms step_avg:401.69ms step:2402/3242 train_loss:3.7046 train_time:960834ms step_avg:401.69ms step:2403/3242 train_loss:3.6616 train_time:961227ms step_avg:401.68ms step:2404/3242 train_loss:3.4402 train_time:961621ms step_avg:401.68ms step:2405/3242 train_loss:3.2777 train_time:962015ms step_avg:401.68ms step:2406/3242 train_loss:3.3167 train_time:962410ms step_avg:401.67ms step:2407/3242 train_loss:3.4857 train_time:962804ms step_avg:401.67ms step:2408/3242 train_loss:3.2841 train_time:963199ms step_avg:401.67ms step:2409/3242 train_loss:3.3522 train_time:963595ms step_avg:401.67ms step:2410/3242 train_loss:3.6760 train_time:963991ms step_avg:401.66ms step:2411/3242 train_loss:3.3196 train_time:964386ms step_avg:401.66ms step:2412/3242 train_loss:3.4696 train_time:964782ms step_avg:401.66ms step:2413/3242 train_loss:3.3524 train_time:965176ms step_avg:401.65ms step:2414/3242 train_loss:3.3839 train_time:965570ms step_avg:401.65ms step:2415/3242 train_loss:3.4779 train_time:965966ms step_avg:401.65ms step:2416/3242 train_loss:3.3179 train_time:966362ms step_avg:401.65ms step:2417/3242 train_loss:3.4877 train_time:966756ms step_avg:401.64ms step:2418/3242 train_loss:3.4688 train_time:967152ms step_avg:401.64ms step:2419/3242 train_loss:3.3474 train_time:967547ms step_avg:401.64ms step:2420/3242 train_loss:3.3130 train_time:967943ms step_avg:401.64ms step:2421/3242 train_loss:3.4651 train_time:968338ms step_avg:401.63ms step:2422/3242 train_loss:3.2825 train_time:968732ms step_avg:401.63ms step:2423/3242 train_loss:3.1938 train_time:969127ms step_avg:401.63ms step:2424/3242 train_loss:3.5459 train_time:969522ms step_avg:401.62ms step:2425/3242 train_loss:3.3695 train_time:969918ms step_avg:401.62ms step:2426/3242 train_loss:3.3665 train_time:970312ms step_avg:401.62ms step:2427/3242 train_loss:3.3957 train_time:970707ms step_avg:401.62ms step:2428/3242 train_loss:3.5861 train_time:971103ms step_avg:401.61ms step:2429/3242 train_loss:3.3945 train_time:971498ms step_avg:401.61ms step:2430/3242 train_loss:3.6321 train_time:971892ms step_avg:401.61ms step:2431/3242 train_loss:3.4994 train_time:972287ms step_avg:401.61ms step:2432/3242 train_loss:3.3799 train_time:972681ms step_avg:401.60ms step:2433/3242 train_loss:3.3871 train_time:973075ms step_avg:401.60ms step:2434/3242 train_loss:3.3823 train_time:973470ms step_avg:401.60ms step:2435/3242 train_loss:3.3927 train_time:973864ms step_avg:401.59ms step:2436/3242 train_loss:3.3753 train_time:974258ms step_avg:401.59ms step:2437/3242 train_loss:3.3707 train_time:974652ms step_avg:401.59ms step:2438/3242 train_loss:3.4183 train_time:975046ms step_avg:401.58ms step:2439/3242 train_loss:3.2350 train_time:975442ms step_avg:401.58ms step:2440/3242 train_loss:3.2130 train_time:975837ms step_avg:401.58ms step:2441/3242 train_loss:3.4123 train_time:976231ms step_avg:401.58ms step:2442/3242 train_loss:3.1905 train_time:976626ms step_avg:401.57ms step:2443/3242 train_loss:3.3511 train_time:977021ms step_avg:401.57ms step:2444/3242 train_loss:3.4599 train_time:977416ms step_avg:401.57ms step:2445/3242 train_loss:3.4828 train_time:977810ms step_avg:401.56ms step:2446/3242 train_loss:3.3573 train_time:978204ms step_avg:401.56ms step:2447/3242 train_loss:3.3304 train_time:978598ms step_avg:401.56ms step:2448/3242 train_loss:3.4718 train_time:978994ms step_avg:401.56ms step:2449/3242 train_loss:3.2484 train_time:979389ms step_avg:401.55ms step:2450/3242 train_loss:3.3601 train_time:979784ms step_avg:401.55ms step:2451/3242 train_loss:3.3504 train_time:980178ms step_avg:401.55ms step:2452/3242 train_loss:3.3422 train_time:980573ms step_avg:401.54ms step:2453/3242 train_loss:3.3530 train_time:980967ms step_avg:401.54ms step:2454/3242 train_loss:3.6864 train_time:981362ms step_avg:401.54ms step:2455/3242 train_loss:3.4191 train_time:981756ms step_avg:401.54ms step:2456/3242 train_loss:3.3250 train_time:982152ms step_avg:401.53ms step:2457/3242 train_loss:3.2708 train_time:982547ms step_avg:401.53ms step:2458/3242 train_loss:3.6581 train_time:982943ms step_avg:401.53ms step:2459/3242 train_loss:3.3293 train_time:983338ms step_avg:401.53ms step:2460/3242 train_loss:3.4964 train_time:983733ms step_avg:401.52ms step:2461/3242 train_loss:3.4486 train_time:984127ms step_avg:401.52ms step:2462/3242 train_loss:3.3736 train_time:984522ms step_avg:401.52ms step:2463/3242 train_loss:3.1809 train_time:984918ms step_avg:401.52ms step:2464/3242 train_loss:3.3637 train_time:985312ms step_avg:401.51ms step:2465/3242 train_loss:3.3445 train_time:985708ms step_avg:401.51ms step:2466/3242 train_loss:3.4389 train_time:986104ms step_avg:401.51ms step:2467/3242 train_loss:3.2504 train_time:986500ms step_avg:401.51ms step:2468/3242 train_loss:3.6018 train_time:986896ms step_avg:401.50ms step:2469/3242 train_loss:3.3790 train_time:987291ms step_avg:401.50ms step:2470/3242 train_loss:3.4870 train_time:987873ms step_avg:401.57ms step:2471/3242 train_loss:3.5578 train_time:988277ms step_avg:401.58ms step:2472/3242 train_loss:3.3761 train_time:988673ms step_avg:401.57ms step:2473/3242 train_loss:3.2958 train_time:989069ms step_avg:401.57ms step:2474/3242 train_loss:3.3768 train_time:989466ms step_avg:401.57ms step:2475/3242 train_loss:3.4034 train_time:989862ms step_avg:401.57ms step:2476/3242 train_loss:3.3704 train_time:990257ms step_avg:401.56ms step:2477/3242 train_loss:3.4735 train_time:990848ms step_avg:401.64ms step:2478/3242 train_loss:3.3567 train_time:991244ms step_avg:401.64ms step:2479/3242 train_loss:3.2360 train_time:991639ms step_avg:401.64ms step:2480/3242 train_loss:3.3225 train_time:992034ms step_avg:401.63ms step:2481/3242 train_loss:3.3329 train_time:992430ms step_avg:401.63ms step:2482/3242 train_loss:3.3655 train_time:992824ms step_avg:401.63ms step:2483/3242 train_loss:3.3914 train_time:993220ms step_avg:401.63ms step:2484/3242 train_loss:3.3759 train_time:993614ms step_avg:401.62ms step:2485/3242 train_loss:3.5606 train_time:994008ms step_avg:401.62ms step:2486/3242 train_loss:3.3906 train_time:994405ms step_avg:401.62ms step:2487/3242 train_loss:3.4796 train_time:994801ms step_avg:401.62ms step:2488/3242 train_loss:3.6236 train_time:995195ms step_avg:401.61ms step:2489/3242 train_loss:3.0386 train_time:995590ms step_avg:401.61ms step:2490/3242 train_loss:3.3803 train_time:995985ms step_avg:401.61ms step:2491/3242 train_loss:3.3612 train_time:996380ms step_avg:401.60ms step:2492/3242 train_loss:3.3852 train_time:996775ms step_avg:401.60ms step:2493/3242 train_loss:3.3743 train_time:997169ms step_avg:401.60ms step:2494/3242 train_loss:3.4293 train_time:997564ms step_avg:401.60ms step:2495/3242 train_loss:3.4869 train_time:997960ms step_avg:401.59ms step:2496/3242 train_loss:3.4988 train_time:998355ms step_avg:401.59ms step:2497/3242 train_loss:3.5128 train_time:998750ms step_avg:401.59ms step:2498/3242 train_loss:3.4551 train_time:999145ms step_avg:401.59ms step:2499/3242 train_loss:3.2633 train_time:999540ms step_avg:401.58ms step:2500/3242 train_loss:3.4387 train_time:999933ms step_avg:401.58ms step:2500/3242 val_loss:3.3644 train_time:999959ms step_avg:401.59ms step:2501/3242 train_loss:3.4990 train_time:1000329ms step_avg:401.58ms step:2502/3242 train_loss:3.4554 train_time:1000725ms step_avg:401.57ms step:2503/3242 train_loss:3.1040 train_time:1001119ms step_avg:401.57ms step:2504/3242 train_loss:3.5936 train_time:1001513ms step_avg:401.57ms step:2505/3242 train_loss:3.2135 train_time:1001907ms step_avg:401.57ms step:2506/3242 train_loss:3.5195 train_time:1002300ms step_avg:401.56ms step:2507/3242 train_loss:3.3054 train_time:1002695ms step_avg:401.56ms step:2508/3242 train_loss:3.3489 train_time:1003091ms step_avg:401.56ms step:2509/3242 train_loss:3.5210 train_time:1003485ms step_avg:401.55ms step:2510/3242 train_loss:3.4272 train_time:1003881ms step_avg:401.55ms step:2511/3242 train_loss:3.3559 train_time:1004277ms step_avg:401.55ms step:2512/3242 train_loss:3.3773 train_time:1004672ms step_avg:401.55ms step:2513/3242 train_loss:3.2714 train_time:1005068ms step_avg:401.55ms step:2514/3242 train_loss:3.1711 train_time:1005463ms step_avg:401.54ms step:2515/3242 train_loss:3.3816 train_time:1005860ms step_avg:401.54ms step:2516/3242 train_loss:3.6393 train_time:1006255ms step_avg:401.54ms step:2517/3242 train_loss:3.3166 train_time:1006650ms step_avg:401.54ms step:2518/3242 train_loss:3.4706 train_time:1007045ms step_avg:401.53ms step:2519/3242 train_loss:3.3574 train_time:1007440ms step_avg:401.53ms step:2520/3242 train_loss:3.4656 train_time:1007836ms step_avg:401.53ms step:2521/3242 train_loss:3.4074 train_time:1008231ms step_avg:401.53ms step:2522/3242 train_loss:3.3410 train_time:1008626ms step_avg:401.52ms step:2523/3242 train_loss:3.4039 train_time:1009020ms step_avg:401.52ms step:2524/3242 train_loss:3.5476 train_time:1009413ms step_avg:401.52ms step:2525/3242 train_loss:3.3525 train_time:1009807ms step_avg:401.51ms step:2526/3242 train_loss:3.5549 train_time:1010200ms step_avg:401.51ms step:2527/3242 train_loss:3.3299 train_time:1010594ms step_avg:401.51ms step:2528/3242 train_loss:3.2707 train_time:1010988ms step_avg:401.50ms step:2529/3242 train_loss:3.3710 train_time:1011382ms step_avg:401.50ms step:2530/3242 train_loss:3.3719 train_time:1011776ms step_avg:401.50ms step:2531/3242 train_loss:3.4847 train_time:1012170ms step_avg:401.50ms step:2532/3242 train_loss:3.3174 train_time:1012563ms step_avg:401.49ms step:2533/3242 train_loss:3.6839 train_time:1012958ms step_avg:401.49ms step:2534/3242 train_loss:3.3782 train_time:1013353ms step_avg:401.49ms step:2535/3242 train_loss:3.2123 train_time:1013747ms step_avg:401.48ms step:2536/3242 train_loss:3.4498 train_time:1014141ms step_avg:401.48ms step:2537/3242 train_loss:3.5013 train_time:1014535ms step_avg:401.48ms step:2538/3242 train_loss:3.3104 train_time:1014928ms step_avg:401.47ms step:2539/3242 train_loss:3.3433 train_time:1015322ms step_avg:401.47ms step:2540/3242 train_loss:3.4243 train_time:1015716ms step_avg:401.47ms step:2541/3242 train_loss:3.3298 train_time:1016111ms step_avg:401.47ms step:2542/3242 train_loss:3.2321 train_time:1016505ms step_avg:401.46ms step:2543/3242 train_loss:3.3775 train_time:1016900ms step_avg:401.46ms step:2544/3242 train_loss:3.1976 train_time:1017295ms step_avg:401.46ms step:2545/3242 train_loss:3.2031 train_time:1017690ms step_avg:401.46ms step:2546/3242 train_loss:3.4228 train_time:1018085ms step_avg:401.45ms step:2547/3242 train_loss:3.5433 train_time:1018479ms step_avg:401.45ms step:2548/3242 train_loss:3.2616 train_time:1018874ms step_avg:401.45ms step:2549/3242 train_loss:3.3944 train_time:1019268ms step_avg:401.44ms step:2550/3242 train_loss:3.3842 train_time:1019662ms step_avg:401.44ms step:2551/3242 train_loss:3.4555 train_time:1020057ms step_avg:401.44ms step:2552/3242 train_loss:3.3684 train_time:1020451ms step_avg:401.44ms step:2553/3242 train_loss:3.3119 train_time:1020846ms step_avg:401.43ms step:2554/3242 train_loss:3.1814 train_time:1021241ms step_avg:401.43ms step:2555/3242 train_loss:3.1112 train_time:1021635ms step_avg:401.43ms step:2556/3242 train_loss:3.3229 train_time:1022029ms step_avg:401.43ms step:2557/3242 train_loss:3.3243 train_time:1022423ms step_avg:401.42ms step:2558/3242 train_loss:3.2113 train_time:1022818ms step_avg:401.42ms step:2559/3242 train_loss:3.3511 train_time:1023212ms step_avg:401.42ms step:2560/3242 train_loss:3.4190 train_time:1023606ms step_avg:401.41ms step:2561/3242 train_loss:3.3744 train_time:1024001ms step_avg:401.41ms step:2562/3242 train_loss:3.1836 train_time:1024396ms step_avg:401.41ms step:2563/3242 train_loss:2.9915 train_time:1024791ms step_avg:401.41ms step:2564/3242 train_loss:3.3451 train_time:1025185ms step_avg:401.40ms step:2565/3242 train_loss:3.3773 train_time:1025579ms step_avg:401.40ms step:2566/3242 train_loss:3.3550 train_time:1025973ms step_avg:401.40ms step:2567/3242 train_loss:3.1281 train_time:1026368ms step_avg:401.40ms step:2568/3242 train_loss:3.1327 train_time:1026762ms step_avg:401.39ms step:2569/3242 train_loss:3.2943 train_time:1027158ms step_avg:401.39ms step:2570/3242 train_loss:3.4784 train_time:1027552ms step_avg:401.39ms step:2571/3242 train_loss:3.3589 train_time:1027945ms step_avg:401.38ms step:2572/3242 train_loss:3.5020 train_time:1028341ms step_avg:401.38ms step:2573/3242 train_loss:3.2683 train_time:1028735ms step_avg:401.38ms step:2574/3242 train_loss:3.4480 train_time:1029130ms step_avg:401.38ms step:2575/3242 train_loss:3.3114 train_time:1029524ms step_avg:401.37ms step:2576/3242 train_loss:3.3481 train_time:1029919ms step_avg:401.37ms step:2577/3242 train_loss:3.5719 train_time:1030313ms step_avg:401.37ms step:2578/3242 train_loss:3.1284 train_time:1030707ms step_avg:401.37ms step:2579/3242 train_loss:3.2122 train_time:1031102ms step_avg:401.36ms step:2580/3242 train_loss:3.3767 train_time:1031496ms step_avg:401.36ms step:2581/3242 train_loss:3.3848 train_time:1031891ms step_avg:401.36ms step:2582/3242 train_loss:3.1893 train_time:1032285ms step_avg:401.35ms step:2583/3242 train_loss:3.5266 train_time:1032679ms step_avg:401.35ms step:2584/3242 train_loss:3.3419 train_time:1033074ms step_avg:401.35ms step:2585/3242 train_loss:3.3844 train_time:1033468ms step_avg:401.35ms step:2586/3242 train_loss:3.3139 train_time:1033861ms step_avg:401.34ms step:2587/3242 train_loss:3.4547 train_time:1034255ms step_avg:401.34ms step:2588/3242 train_loss:3.3894 train_time:1034648ms step_avg:401.34ms step:2589/3242 train_loss:3.3149 train_time:1035042ms step_avg:401.33ms step:2590/3242 train_loss:3.4494 train_time:1035586ms step_avg:401.39ms step:2591/3242 train_loss:3.3316 train_time:1035988ms step_avg:401.39ms step:2592/3242 train_loss:3.2137 train_time:1036385ms step_avg:401.39ms step:2593/3242 train_loss:3.3479 train_time:1036779ms step_avg:401.39ms step:2594/3242 train_loss:3.3238 train_time:1037173ms step_avg:401.38ms step:2595/3242 train_loss:3.2709 train_time:1037567ms step_avg:401.38ms step:2596/3242 train_loss:3.2026 train_time:1037961ms step_avg:401.38ms step:2597/3242 train_loss:3.3828 train_time:1038356ms step_avg:401.37ms step:2598/3242 train_loss:3.3118 train_time:1038752ms step_avg:401.37ms step:2599/3242 train_loss:3.1931 train_time:1039147ms step_avg:401.37ms step:2600/3242 train_loss:3.5415 train_time:1039543ms step_avg:401.37ms step:2601/3242 train_loss:3.3649 train_time:1039938ms step_avg:401.37ms step:2602/3242 train_loss:3.2344 train_time:1040334ms step_avg:401.36ms step:2603/3242 train_loss:3.3541 train_time:1040730ms step_avg:401.36ms step:2604/3242 train_loss:3.5263 train_time:1041124ms step_avg:401.36ms step:2605/3242 train_loss:3.3337 train_time:1041518ms step_avg:401.36ms step:2606/3242 train_loss:3.0621 train_time:1041912ms step_avg:401.35ms step:2607/3242 train_loss:3.3813 train_time:1042306ms step_avg:401.35ms step:2608/3242 train_loss:3.3155 train_time:1042700ms step_avg:401.35ms step:2609/3242 train_loss:3.4061 train_time:1043094ms step_avg:401.34ms step:2610/3242 train_loss:3.2974 train_time:1043489ms step_avg:401.34ms step:2611/3242 train_loss:3.5048 train_time:1043884ms step_avg:401.34ms step:2612/3242 train_loss:3.2407 train_time:1044280ms step_avg:401.34ms step:2613/3242 train_loss:3.4828 train_time:1044675ms step_avg:401.34ms step:2614/3242 train_loss:3.1666 train_time:1045069ms step_avg:401.33ms step:2615/3242 train_loss:3.2662 train_time:1045463ms step_avg:401.33ms step:2616/3242 train_loss:3.2764 train_time:1045858ms step_avg:401.33ms step:2617/3242 train_loss:3.3842 train_time:1046253ms step_avg:401.32ms step:2618/3242 train_loss:3.4142 train_time:1046648ms step_avg:401.32ms step:2619/3242 train_loss:3.3309 train_time:1047042ms step_avg:401.32ms step:2620/3242 train_loss:3.1920 train_time:1047437ms step_avg:401.32ms step:2621/3242 train_loss:3.4894 train_time:1047831ms step_avg:401.31ms step:2622/3242 train_loss:3.2994 train_time:1048226ms step_avg:401.31ms step:2623/3242 train_loss:3.3298 train_time:1048620ms step_avg:401.31ms step:2624/3242 train_loss:3.5626 train_time:1049014ms step_avg:401.31ms step:2625/3242 train_loss:3.3276 train_time:1049409ms step_avg:401.30ms step:2625/3242 val_loss:3.3421 train_time:1049435ms step_avg:401.31ms step:2626/3242 train_loss:3.4181 train_time:1049804ms step_avg:401.30ms step:2627/3242 train_loss:3.4783 train_time:1050200ms step_avg:401.30ms step:2628/3242 train_loss:3.1809 train_time:1050595ms step_avg:401.30ms step:2629/3242 train_loss:3.1973 train_time:1050988ms step_avg:401.29ms step:2630/3242 train_loss:3.2836 train_time:1051384ms step_avg:401.29ms step:2631/3242 train_loss:3.3119 train_time:1051779ms step_avg:401.29ms step:2632/3242 train_loss:3.5112 train_time:1052174ms step_avg:401.29ms step:2633/3242 train_loss:3.5504 train_time:1052569ms step_avg:401.28ms step:2634/3242 train_loss:3.5087 train_time:1052964ms step_avg:401.28ms step:2635/3242 train_loss:3.3889 train_time:1053359ms step_avg:401.28ms step:2636/3242 train_loss:3.3326 train_time:1053755ms step_avg:401.28ms step:2637/3242 train_loss:3.3277 train_time:1054149ms step_avg:401.28ms step:2638/3242 train_loss:3.5542 train_time:1054545ms step_avg:401.27ms step:2639/3242 train_loss:3.3069 train_time:1054940ms step_avg:401.27ms step:2640/3242 train_loss:3.2853 train_time:1055335ms step_avg:401.27ms step:2641/3242 train_loss:3.3520 train_time:1055730ms step_avg:401.27ms step:2642/3242 train_loss:3.5083 train_time:1056125ms step_avg:401.26ms step:2643/3242 train_loss:3.5191 train_time:1056521ms step_avg:401.26ms step:2644/3242 train_loss:3.2334 train_time:1056916ms step_avg:401.26ms step:2645/3242 train_loss:3.5333 train_time:1057311ms step_avg:401.26ms step:2646/3242 train_loss:3.3193 train_time:1057870ms step_avg:401.32ms step:2647/3242 train_loss:3.4397 train_time:1058272ms step_avg:401.32ms step:2648/3242 train_loss:3.2824 train_time:1058667ms step_avg:401.31ms step:2649/3242 train_loss:3.1996 train_time:1059063ms step_avg:401.31ms step:2650/3242 train_loss:3.3400 train_time:1059458ms step_avg:401.31ms step:2651/3242 train_loss:3.2503 train_time:1059853ms step_avg:401.31ms step:2652/3242 train_loss:3.3228 train_time:1060248ms step_avg:401.31ms step:2653/3242 train_loss:3.6325 train_time:1060644ms step_avg:401.30ms step:2654/3242 train_loss:3.3961 train_time:1061039ms step_avg:401.30ms step:2655/3242 train_loss:3.2741 train_time:1061435ms step_avg:401.30ms step:2656/3242 train_loss:3.3092 train_time:1061973ms step_avg:401.35ms step:2657/3242 train_loss:3.4409 train_time:1062377ms step_avg:401.35ms step:2658/3242 train_loss:3.2642 train_time:1062773ms step_avg:401.35ms step:2659/3242 train_loss:3.3475 train_time:1063168ms step_avg:401.35ms step:2660/3242 train_loss:3.3616 train_time:1063751ms step_avg:401.42ms step:2661/3242 train_loss:3.4514 train_time:1064154ms step_avg:401.42ms step:2662/3242 train_loss:3.3873 train_time:1064549ms step_avg:401.41ms step:2663/3242 train_loss:3.2626 train_time:1064945ms step_avg:401.41ms step:2664/3242 train_loss:3.4321 train_time:1065342ms step_avg:401.41ms step:2665/3242 train_loss:3.3455 train_time:1065739ms step_avg:401.41ms step:2666/3242 train_loss:3.3577 train_time:1066135ms step_avg:401.41ms step:2667/3242 train_loss:3.4732 train_time:1066727ms step_avg:401.48ms step:2668/3242 train_loss:3.3040 train_time:1067245ms step_avg:401.52ms step:2669/3242 train_loss:3.4484 train_time:1067647ms step_avg:401.52ms step:2670/3242 train_loss:3.3549 train_time:1068043ms step_avg:401.52ms step:2671/3242 train_loss:3.2650 train_time:1068437ms step_avg:401.52ms step:2672/3242 train_loss:3.3310 train_time:1068833ms step_avg:401.51ms step:2673/3242 train_loss:3.2769 train_time:1069392ms step_avg:401.57ms step:2674/3242 train_loss:3.4205 train_time:1069787ms step_avg:401.57ms step:2675/3242 train_loss:3.2481 train_time:1070182ms step_avg:401.57ms step:2676/3242 train_loss:3.4351 train_time:1070577ms step_avg:401.57ms step:2677/3242 train_loss:3.1973 train_time:1070974ms step_avg:401.56ms step:2678/3242 train_loss:3.2446 train_time:1071369ms step_avg:401.56ms step:2679/3242 train_loss:3.2768 train_time:1071765ms step_avg:401.56ms step:2680/3242 train_loss:3.3843 train_time:1072161ms step_avg:401.56ms step:2681/3242 train_loss:3.3082 train_time:1072555ms step_avg:401.56ms step:2682/3242 train_loss:3.3400 train_time:1072949ms step_avg:401.55ms step:2683/3242 train_loss:3.2734 train_time:1073343ms step_avg:401.55ms step:2684/3242 train_loss:3.3966 train_time:1073738ms step_avg:401.55ms step:2685/3242 train_loss:3.4379 train_time:1074133ms step_avg:401.54ms step:2686/3242 train_loss:3.2252 train_time:1074528ms step_avg:401.54ms step:2687/3242 train_loss:3.1553 train_time:1074922ms step_avg:401.54ms step:2688/3242 train_loss:3.1913 train_time:1075316ms step_avg:401.54ms step:2689/3242 train_loss:3.3054 train_time:1075710ms step_avg:401.53ms step:2690/3242 train_loss:3.4181 train_time:1076104ms step_avg:401.53ms step:2691/3242 train_loss:3.4848 train_time:1076498ms step_avg:401.53ms step:2692/3242 train_loss:3.3499 train_time:1076891ms step_avg:401.53ms step:2693/3242 train_loss:3.3112 train_time:1077285ms step_avg:401.52ms step:2694/3242 train_loss:3.6391 train_time:1077680ms step_avg:401.52ms step:2695/3242 train_loss:3.3572 train_time:1078074ms step_avg:401.52ms step:2696/3242 train_loss:3.3072 train_time:1078468ms step_avg:401.51ms step:2697/3242 train_loss:3.2470 train_time:1078862ms step_avg:401.51ms step:2698/3242 train_loss:3.3329 train_time:1079258ms step_avg:401.51ms step:2699/3242 train_loss:3.1617 train_time:1079652ms step_avg:401.51ms step:2700/3242 train_loss:3.8036 train_time:1080048ms step_avg:401.50ms step:2701/3242 train_loss:3.4001 train_time:1080443ms step_avg:401.50ms step:2702/3242 train_loss:3.2403 train_time:1080840ms step_avg:401.50ms step:2703/3242 train_loss:3.3862 train_time:1081236ms step_avg:401.50ms step:2704/3242 train_loss:3.2842 train_time:1081632ms step_avg:401.50ms step:2705/3242 train_loss:3.4273 train_time:1082028ms step_avg:401.49ms step:2706/3242 train_loss:3.5213 train_time:1082423ms step_avg:401.49ms step:2707/3242 train_loss:3.3242 train_time:1082819ms step_avg:401.49ms step:2708/3242 train_loss:3.2469 train_time:1083214ms step_avg:401.49ms step:2709/3242 train_loss:3.2246 train_time:1083610ms step_avg:401.49ms step:2710/3242 train_loss:3.2440 train_time:1084005ms step_avg:401.48ms step:2711/3242 train_loss:3.2586 train_time:1084400ms step_avg:401.48ms step:2712/3242 train_loss:3.5110 train_time:1084796ms step_avg:401.48ms step:2713/3242 train_loss:3.3725 train_time:1085191ms step_avg:401.48ms step:2714/3242 train_loss:3.3668 train_time:1085734ms step_avg:401.53ms step:2715/3242 train_loss:3.4169 train_time:1086128ms step_avg:401.53ms step:2716/3242 train_loss:3.4860 train_time:1086522ms step_avg:401.52ms step:2717/3242 train_loss:3.3495 train_time:1086916ms step_avg:401.52ms step:2718/3242 train_loss:3.3693 train_time:1087310ms step_avg:401.52ms step:2719/3242 train_loss:3.1609 train_time:1087704ms step_avg:401.52ms step:2720/3242 train_loss:3.4580 train_time:1088099ms step_avg:401.51ms step:2721/3242 train_loss:3.1646 train_time:1088493ms step_avg:401.51ms step:2722/3242 train_loss:3.1727 train_time:1088886ms step_avg:401.51ms step:2723/3242 train_loss:3.5635 train_time:1089281ms step_avg:401.50ms step:2724/3242 train_loss:3.2937 train_time:1089675ms step_avg:401.50ms step:2725/3242 train_loss:3.3384 train_time:1090069ms step_avg:401.50ms step:2726/3242 train_loss:3.2004 train_time:1090464ms step_avg:401.50ms step:2727/3242 train_loss:3.4143 train_time:1090859ms step_avg:401.49ms step:2728/3242 train_loss:3.5302 train_time:1091253ms step_avg:401.49ms step:2729/3242 train_loss:3.2246 train_time:1091648ms step_avg:401.49ms step:2730/3242 train_loss:3.3091 train_time:1092043ms step_avg:401.49ms step:2731/3242 train_loss:3.4316 train_time:1092438ms step_avg:401.48ms step:2732/3242 train_loss:3.2887 train_time:1092832ms step_avg:401.48ms step:2733/3242 train_loss:3.3312 train_time:1093226ms step_avg:401.48ms step:2734/3242 train_loss:3.5018 train_time:1093621ms step_avg:401.48ms step:2735/3242 train_loss:3.4298 train_time:1094015ms step_avg:401.47ms step:2736/3242 train_loss:3.2275 train_time:1094409ms step_avg:401.47ms step:2737/3242 train_loss:3.2975 train_time:1094803ms step_avg:401.47ms step:2738/3242 train_loss:3.2031 train_time:1095197ms step_avg:401.47ms step:2739/3242 train_loss:3.2330 train_time:1095591ms step_avg:401.46ms step:2740/3242 train_loss:3.2299 train_time:1095985ms step_avg:401.46ms step:2741/3242 train_loss:3.2610 train_time:1096380ms step_avg:401.46ms step:2742/3242 train_loss:3.1068 train_time:1096774ms step_avg:401.45ms step:2743/3242 train_loss:3.3630 train_time:1097169ms step_avg:401.45ms step:2744/3242 train_loss:3.2973 train_time:1097564ms step_avg:401.45ms step:2745/3242 train_loss:3.6512 train_time:1098121ms step_avg:401.51ms step:2746/3242 train_loss:3.3497 train_time:1098515ms step_avg:401.50ms step:2747/3242 train_loss:3.3232 train_time:1098909ms step_avg:401.50ms step:2748/3242 train_loss:3.2738 train_time:1099303ms step_avg:401.50ms step:2749/3242 train_loss:3.2972 train_time:1099697ms step_avg:401.50ms step:2750/3242 train_loss:3.2344 train_time:1100091ms step_avg:401.49ms step:2750/3242 val_loss:3.3211 train_time:1100118ms step_avg:401.50ms step:2751/3242 train_loss:3.2643 train_time:1100488ms step_avg:401.49ms step:2752/3242 train_loss:3.1392 train_time:1100883ms step_avg:401.49ms step:2753/3242 train_loss:3.2323 train_time:1101278ms step_avg:401.49ms step:2754/3242 train_loss:3.3946 train_time:1101672ms step_avg:401.48ms step:2755/3242 train_loss:3.3398 train_time:1102066ms step_avg:401.48ms step:2756/3242 train_loss:3.4248 train_time:1102460ms step_avg:401.48ms step:2757/3242 train_loss:3.4460 train_time:1102855ms step_avg:401.48ms step:2758/3242 train_loss:3.4088 train_time:1103250ms step_avg:401.47ms step:2759/3242 train_loss:3.4727 train_time:1103644ms step_avg:401.47ms step:2760/3242 train_loss:3.2458 train_time:1104039ms step_avg:401.47ms step:2761/3242 train_loss:3.3208 train_time:1104433ms step_avg:401.47ms step:2762/3242 train_loss:3.4199 train_time:1104829ms step_avg:401.46ms step:2763/3242 train_loss:3.3442 train_time:1105224ms step_avg:401.46ms step:2764/3242 train_loss:2.9678 train_time:1105619ms step_avg:401.46ms step:2765/3242 train_loss:3.3131 train_time:1106014ms step_avg:401.46ms step:2766/3242 train_loss:3.3415 train_time:1106410ms step_avg:401.46ms step:2767/3242 train_loss:3.4175 train_time:1106805ms step_avg:401.45ms step:2768/3242 train_loss:3.3475 train_time:1107201ms step_avg:401.45ms step:2769/3242 train_loss:3.2360 train_time:1107597ms step_avg:401.45ms step:2770/3242 train_loss:3.2972 train_time:1107992ms step_avg:401.45ms step:2771/3242 train_loss:3.3844 train_time:1108387ms step_avg:401.44ms step:2772/3242 train_loss:3.1364 train_time:1108782ms step_avg:401.44ms step:2773/3242 train_loss:3.3221 train_time:1109176ms step_avg:401.44ms step:2774/3242 train_loss:3.4294 train_time:1109570ms step_avg:401.44ms step:2775/3242 train_loss:3.1846 train_time:1109964ms step_avg:401.43ms step:2776/3242 train_loss:3.4088 train_time:1110359ms step_avg:401.43ms step:2777/3242 train_loss:3.4326 train_time:1110753ms step_avg:401.43ms step:2778/3242 train_loss:3.2652 train_time:1111147ms step_avg:401.43ms step:2779/3242 train_loss:3.1280 train_time:1111541ms step_avg:401.42ms step:2780/3242 train_loss:3.2755 train_time:1111936ms step_avg:401.42ms step:2781/3242 train_loss:3.5116 train_time:1112332ms step_avg:401.42ms step:2782/3242 train_loss:3.3153 train_time:1112728ms step_avg:401.42ms step:2783/3242 train_loss:3.4609 train_time:1113124ms step_avg:401.42ms step:2784/3242 train_loss:3.3506 train_time:1113521ms step_avg:401.41ms step:2785/3242 train_loss:3.2358 train_time:1113916ms step_avg:401.41ms step:2786/3242 train_loss:3.1725 train_time:1114310ms step_avg:401.41ms step:2787/3242 train_loss:3.2634 train_time:1114705ms step_avg:401.41ms step:2788/3242 train_loss:3.3171 train_time:1115101ms step_avg:401.40ms step:2789/3242 train_loss:3.3386 train_time:1115495ms step_avg:401.40ms step:2790/3242 train_loss:3.3257 train_time:1115890ms step_avg:401.40ms step:2791/3242 train_loss:3.3259 train_time:1116285ms step_avg:401.40ms step:2792/3242 train_loss:3.3101 train_time:1116680ms step_avg:401.39ms step:2793/3242 train_loss:3.2167 train_time:1117075ms step_avg:401.39ms step:2794/3242 train_loss:3.4279 train_time:1117470ms step_avg:401.39ms step:2795/3242 train_loss:3.3442 train_time:1117865ms step_avg:401.39ms step:2796/3242 train_loss:3.2954 train_time:1118260ms step_avg:401.39ms step:2797/3242 train_loss:3.3031 train_time:1118654ms step_avg:401.38ms step:2798/3242 train_loss:3.2440 train_time:1119050ms step_avg:401.38ms step:2799/3242 train_loss:3.2516 train_time:1119446ms step_avg:401.38ms step:2800/3242 train_loss:3.4010 train_time:1119842ms step_avg:401.38ms step:2801/3242 train_loss:3.2018 train_time:1120241ms step_avg:401.38ms step:2802/3242 train_loss:3.4253 train_time:1120636ms step_avg:401.37ms step:2803/3242 train_loss:3.2689 train_time:1121030ms step_avg:401.37ms step:2804/3242 train_loss:3.2916 train_time:1121426ms step_avg:401.37ms step:2805/3242 train_loss:3.1523 train_time:1121820ms step_avg:401.37ms step:2806/3242 train_loss:3.5433 train_time:1122215ms step_avg:401.36ms step:2807/3242 train_loss:3.4325 train_time:1122610ms step_avg:401.36ms step:2808/3242 train_loss:3.5388 train_time:1123005ms step_avg:401.36ms step:2809/3242 train_loss:3.2383 train_time:1123401ms step_avg:401.36ms step:2810/3242 train_loss:3.3857 train_time:1123796ms step_avg:401.36ms step:2811/3242 train_loss:3.3717 train_time:1124190ms step_avg:401.35ms step:2812/3242 train_loss:3.1264 train_time:1124585ms step_avg:401.35ms step:2813/3242 train_loss:3.5749 train_time:1124980ms step_avg:401.35ms step:2814/3242 train_loss:3.2870 train_time:1125373ms step_avg:401.35ms step:2815/3242 train_loss:3.2744 train_time:1125768ms step_avg:401.34ms step:2816/3242 train_loss:3.5701 train_time:1126162ms step_avg:401.34ms step:2817/3242 train_loss:3.2785 train_time:1126557ms step_avg:401.34ms step:2818/3242 train_loss:3.3606 train_time:1126951ms step_avg:401.34ms step:2819/3242 train_loss:3.1850 train_time:1127347ms step_avg:401.33ms step:2820/3242 train_loss:3.4091 train_time:1127742ms step_avg:401.33ms step:2821/3242 train_loss:3.3281 train_time:1128136ms step_avg:401.33ms step:2822/3242 train_loss:3.3264 train_time:1128530ms step_avg:401.33ms step:2823/3242 train_loss:2.7113 train_time:1128925ms step_avg:401.32ms step:2824/3242 train_loss:3.2924 train_time:1129320ms step_avg:401.32ms step:2825/3242 train_loss:3.1988 train_time:1129714ms step_avg:401.32ms step:2826/3242 train_loss:3.4799 train_time:1130109ms step_avg:401.32ms step:2827/3242 train_loss:3.2009 train_time:1130504ms step_avg:401.31ms step:2828/3242 train_loss:3.3481 train_time:1130899ms step_avg:401.31ms step:2829/3242 train_loss:3.4543 train_time:1131293ms step_avg:401.31ms step:2830/3242 train_loss:3.3939 train_time:1131687ms step_avg:401.31ms step:2831/3242 train_loss:3.0692 train_time:1132081ms step_avg:401.30ms step:2832/3242 train_loss:3.3816 train_time:1132476ms step_avg:401.30ms step:2833/3242 train_loss:3.4496 train_time:1132871ms step_avg:401.30ms step:2834/3242 train_loss:3.0413 train_time:1133267ms step_avg:401.30ms step:2835/3242 train_loss:3.3062 train_time:1133663ms step_avg:401.30ms step:2836/3242 train_loss:3.3439 train_time:1134059ms step_avg:401.29ms step:2837/3242 train_loss:3.2602 train_time:1134453ms step_avg:401.29ms step:2838/3242 train_loss:3.3847 train_time:1134847ms step_avg:401.29ms step:2839/3242 train_loss:3.2688 train_time:1135243ms step_avg:401.29ms step:2840/3242 train_loss:3.2579 train_time:1135640ms step_avg:401.29ms step:2841/3242 train_loss:3.0102 train_time:1136035ms step_avg:401.28ms step:2842/3242 train_loss:3.4873 train_time:1136431ms step_avg:401.28ms step:2843/3242 train_loss:3.3576 train_time:1136826ms step_avg:401.28ms step:2844/3242 train_loss:3.2843 train_time:1137221ms step_avg:401.28ms step:2845/3242 train_loss:3.2255 train_time:1137616ms step_avg:401.28ms step:2846/3242 train_loss:3.2458 train_time:1138011ms step_avg:401.27ms step:2847/3242 train_loss:3.3560 train_time:1138406ms step_avg:401.27ms step:2848/3242 train_loss:3.2321 train_time:1138799ms step_avg:401.27ms step:2849/3242 train_loss:3.3409 train_time:1139193ms step_avg:401.27ms step:2850/3242 train_loss:3.4137 train_time:1139776ms step_avg:401.33ms step:2851/3242 train_loss:3.2783 train_time:1140179ms step_avg:401.33ms step:2852/3242 train_loss:3.2454 train_time:1140574ms step_avg:401.33ms step:2853/3242 train_loss:3.2656 train_time:1140970ms step_avg:401.33ms step:2854/3242 train_loss:3.7314 train_time:1141366ms step_avg:401.32ms step:2855/3242 train_loss:3.2588 train_time:1141761ms step_avg:401.32ms step:2856/3242 train_loss:3.2579 train_time:1142156ms step_avg:401.32ms step:2857/3242 train_loss:3.3841 train_time:1142550ms step_avg:401.32ms step:2858/3242 train_loss:3.2659 train_time:1143141ms step_avg:401.38ms step:2859/3242 train_loss:3.3287 train_time:1143537ms step_avg:401.38ms step:2860/3242 train_loss:3.3487 train_time:1143932ms step_avg:401.38ms step:2861/3242 train_loss:3.3264 train_time:1144330ms step_avg:401.38ms step:2862/3242 train_loss:3.4075 train_time:1144725ms step_avg:401.38ms step:2863/3242 train_loss:3.3713 train_time:1145120ms step_avg:401.37ms step:2864/3242 train_loss:3.3152 train_time:1145514ms step_avg:401.37ms step:2865/3242 train_loss:3.3733 train_time:1145910ms step_avg:401.37ms step:2866/3242 train_loss:3.1865 train_time:1146305ms step_avg:401.37ms step:2867/3242 train_loss:3.2371 train_time:1146700ms step_avg:401.36ms step:2868/3242 train_loss:3.1934 train_time:1147094ms step_avg:401.36ms step:2869/3242 train_loss:3.3764 train_time:1147488ms step_avg:401.36ms step:2870/3242 train_loss:3.2656 train_time:1147882ms step_avg:401.36ms step:2871/3242 train_loss:3.3044 train_time:1148277ms step_avg:401.36ms step:2872/3242 train_loss:3.3890 train_time:1148671ms step_avg:401.35ms step:2873/3242 train_loss:3.2914 train_time:1149066ms step_avg:401.35ms step:2874/3242 train_loss:3.2420 train_time:1149461ms step_avg:401.35ms step:2875/3242 train_loss:3.3833 train_time:1149856ms step_avg:401.35ms step:2875/3242 val_loss:3.3003 train_time:1149882ms step_avg:401.35ms step:2876/3242 train_loss:3.0845 train_time:1150253ms step_avg:401.34ms step:2877/3242 train_loss:3.2707 train_time:1150649ms step_avg:401.34ms step:2878/3242 train_loss:3.1903 train_time:1151043ms step_avg:401.34ms step:2879/3242 train_loss:3.3181 train_time:1151439ms step_avg:401.34ms step:2880/3242 train_loss:3.3074 train_time:1151834ms step_avg:401.34ms step:2881/3242 train_loss:3.5179 train_time:1152230ms step_avg:401.33ms step:2882/3242 train_loss:3.3759 train_time:1152627ms step_avg:401.33ms step:2883/3242 train_loss:3.3645 train_time:1153021ms step_avg:401.33ms step:2884/3242 train_loss:3.3398 train_time:1153417ms step_avg:401.33ms step:2885/3242 train_loss:3.1468 train_time:1153813ms step_avg:401.33ms step:2886/3242 train_loss:3.7297 train_time:1154209ms step_avg:401.32ms step:2887/3242 train_loss:3.3999 train_time:1154604ms step_avg:401.32ms step:2888/3242 train_loss:3.3151 train_time:1155000ms step_avg:401.32ms step:2889/3242 train_loss:3.1691 train_time:1155395ms step_avg:401.32ms step:2890/3242 train_loss:3.3121 train_time:1155791ms step_avg:401.32ms step:2891/3242 train_loss:3.3804 train_time:1156186ms step_avg:401.31ms step:2892/3242 train_loss:3.1866 train_time:1156581ms step_avg:401.31ms step:2893/3242 train_loss:3.4132 train_time:1156976ms step_avg:401.31ms step:2894/3242 train_loss:3.3395 train_time:1157369ms step_avg:401.31ms step:2895/3242 train_loss:3.3502 train_time:1157763ms step_avg:401.30ms step:2896/3242 train_loss:3.2741 train_time:1158158ms step_avg:401.30ms step:2897/3242 train_loss:3.3732 train_time:1158554ms step_avg:401.30ms step:2898/3242 train_loss:3.1678 train_time:1158949ms step_avg:401.30ms step:2899/3242 train_loss:3.4420 train_time:1159345ms step_avg:401.30ms step:2900/3242 train_loss:3.1313 train_time:1159740ms step_avg:401.29ms step:2901/3242 train_loss:3.1874 train_time:1160135ms step_avg:401.29ms step:2902/3242 train_loss:3.1242 train_time:1160529ms step_avg:401.29ms step:2903/3242 train_loss:3.3379 train_time:1160924ms step_avg:401.29ms step:2904/3242 train_loss:3.1450 train_time:1161319ms step_avg:401.28ms step:2905/3242 train_loss:3.1710 train_time:1161713ms step_avg:401.28ms step:2906/3242 train_loss:4.7829 train_time:1162108ms step_avg:401.28ms step:2907/3242 train_loss:3.2969 train_time:1162503ms step_avg:401.28ms step:2908/3242 train_loss:3.0975 train_time:1162897ms step_avg:401.28ms step:2909/3242 train_loss:3.2006 train_time:1163292ms step_avg:401.27ms step:2910/3242 train_loss:3.3489 train_time:1163686ms step_avg:401.27ms step:2911/3242 train_loss:3.2084 train_time:1164080ms step_avg:401.27ms step:2912/3242 train_loss:3.3768 train_time:1164474ms step_avg:401.27ms step:2913/3242 train_loss:3.2599 train_time:1164868ms step_avg:401.26ms step:2914/3242 train_loss:3.2077 train_time:1165261ms step_avg:401.26ms step:2915/3242 train_loss:3.4026 train_time:1165657ms step_avg:401.26ms step:2916/3242 train_loss:3.3139 train_time:1166052ms step_avg:401.26ms step:2917/3242 train_loss:3.2853 train_time:1166447ms step_avg:401.25ms step:2918/3242 train_loss:3.2390 train_time:1166843ms step_avg:401.25ms step:2919/3242 train_loss:3.4073 train_time:1167238ms step_avg:401.25ms step:2920/3242 train_loss:3.3794 train_time:1167634ms step_avg:401.25ms step:2921/3242 train_loss:3.2635 train_time:1168029ms step_avg:401.25ms step:2922/3242 train_loss:3.5087 train_time:1168424ms step_avg:401.24ms step:2923/3242 train_loss:3.4257 train_time:1168819ms step_avg:401.24ms step:2924/3242 train_loss:3.2907 train_time:1169215ms step_avg:401.24ms step:2925/3242 train_loss:3.3779 train_time:1169610ms step_avg:401.24ms step:2926/3242 train_loss:3.2025 train_time:1170005ms step_avg:401.24ms step:2927/3242 train_loss:3.2763 train_time:1170400ms step_avg:401.23ms step:2928/3242 train_loss:3.2149 train_time:1170796ms step_avg:401.23ms step:2929/3242 train_loss:3.2791 train_time:1171191ms step_avg:401.23ms step:2930/3242 train_loss:3.4775 train_time:1171587ms step_avg:401.23ms step:2931/3242 train_loss:3.2620 train_time:1171983ms step_avg:401.23ms step:2932/3242 train_loss:3.2622 train_time:1172378ms step_avg:401.22ms step:2933/3242 train_loss:3.9001 train_time:1172773ms step_avg:401.22ms step:2934/3242 train_loss:3.3415 train_time:1173168ms step_avg:401.22ms step:2935/3242 train_loss:3.2755 train_time:1173562ms step_avg:401.22ms step:2936/3242 train_loss:3.3107 train_time:1173958ms step_avg:401.22ms step:2937/3242 train_loss:3.1954 train_time:1174354ms step_avg:401.21ms step:2938/3242 train_loss:3.3011 train_time:1174748ms step_avg:401.21ms step:2939/3242 train_loss:3.5773 train_time:1175142ms step_avg:401.21ms step:2940/3242 train_loss:3.2200 train_time:1175535ms step_avg:401.21ms step:2941/3242 train_loss:3.2817 train_time:1175931ms step_avg:401.20ms step:2942/3242 train_loss:3.4236 train_time:1176325ms step_avg:401.20ms step:2943/3242 train_loss:3.4982 train_time:1176720ms step_avg:401.20ms step:2944/3242 train_loss:3.2605 train_time:1177116ms step_avg:401.20ms step:2945/3242 train_loss:3.3198 train_time:1177509ms step_avg:401.20ms step:2946/3242 train_loss:3.3251 train_time:1177903ms step_avg:401.19ms step:2947/3242 train_loss:3.3246 train_time:1178298ms step_avg:401.19ms step:2948/3242 train_loss:3.1675 train_time:1178694ms step_avg:401.19ms step:2949/3242 train_loss:3.5939 train_time:1179089ms step_avg:401.19ms step:2950/3242 train_loss:3.1846 train_time:1179484ms step_avg:401.18ms step:2951/3242 train_loss:3.4956 train_time:1179878ms step_avg:401.18ms step:2952/3242 train_loss:3.1765 train_time:1180271ms step_avg:401.18ms step:2953/3242 train_loss:3.1988 train_time:1180664ms step_avg:401.18ms step:2954/3242 train_loss:3.0232 train_time:1181059ms step_avg:401.17ms step:2955/3242 train_loss:3.3002 train_time:1181453ms step_avg:401.17ms step:2956/3242 train_loss:3.2624 train_time:1181847ms step_avg:401.17ms step:2957/3242 train_loss:3.3906 train_time:1182242ms step_avg:401.17ms step:2958/3242 train_loss:3.1172 train_time:1182636ms step_avg:401.17ms step:2959/3242 train_loss:3.2379 train_time:1183031ms step_avg:401.16ms step:2960/3242 train_loss:3.2104 train_time:1183425ms step_avg:401.16ms step:2961/3242 train_loss:3.3110 train_time:1183820ms step_avg:401.16ms step:2962/3242 train_loss:3.3055 train_time:1184214ms step_avg:401.16ms step:2963/3242 train_loss:3.2196 train_time:1184611ms step_avg:401.16ms step:2964/3242 train_loss:3.3383 train_time:1185006ms step_avg:401.15ms step:2965/3242 train_loss:3.2887 train_time:1185399ms step_avg:401.15ms step:2966/3242 train_loss:3.4612 train_time:1185794ms step_avg:401.15ms step:2967/3242 train_loss:3.2504 train_time:1186188ms step_avg:401.15ms step:2968/3242 train_loss:3.1749 train_time:1186581ms step_avg:401.14ms step:2969/3242 train_loss:2.9979 train_time:1186975ms step_avg:401.14ms step:2970/3242 train_loss:3.2806 train_time:1187369ms step_avg:401.14ms step:2971/3242 train_loss:3.4830 train_time:1187761ms step_avg:401.14ms step:2972/3242 train_loss:3.1946 train_time:1188155ms step_avg:401.13ms step:2973/3242 train_loss:3.3728 train_time:1188549ms step_avg:401.13ms step:2974/3242 train_loss:3.2434 train_time:1188943ms step_avg:401.13ms step:2975/3242 train_loss:3.2051 train_time:1189337ms step_avg:401.13ms step:2976/3242 train_loss:3.1636 train_time:1189731ms step_avg:401.12ms step:2977/3242 train_loss:3.2188 train_time:1190125ms step_avg:401.12ms step:2978/3242 train_loss:3.1059 train_time:1190519ms step_avg:401.12ms step:2979/3242 train_loss:3.2455 train_time:1190913ms step_avg:401.12ms step:2980/3242 train_loss:3.1859 train_time:1191307ms step_avg:401.11ms step:2981/3242 train_loss:3.4154 train_time:1191701ms step_avg:401.11ms step:2982/3242 train_loss:3.2037 train_time:1192095ms step_avg:401.11ms step:2983/3242 train_loss:3.2249 train_time:1192491ms step_avg:401.11ms step:2984/3242 train_loss:3.2954 train_time:1192885ms step_avg:401.10ms step:2985/3242 train_loss:3.2973 train_time:1193279ms step_avg:401.10ms step:2986/3242 train_loss:3.3228 train_time:1193673ms step_avg:401.10ms step:2987/3242 train_loss:3.3754 train_time:1194066ms step_avg:401.10ms step:2988/3242 train_loss:3.2278 train_time:1194460ms step_avg:401.09ms step:2989/3242 train_loss:3.2416 train_time:1194855ms step_avg:401.09ms step:2990/3242 train_loss:3.3843 train_time:1195249ms step_avg:401.09ms step:2991/3242 train_loss:3.5427 train_time:1195642ms step_avg:401.09ms step:2992/3242 train_loss:3.4516 train_time:1196037ms step_avg:401.09ms step:2993/3242 train_loss:3.3855 train_time:1196431ms step_avg:401.08ms step:2994/3242 train_loss:3.2675 train_time:1196825ms step_avg:401.08ms step:2995/3242 train_loss:3.1642 train_time:1197220ms step_avg:401.08ms step:2996/3242 train_loss:3.1500 train_time:1197615ms step_avg:401.08ms step:2997/3242 train_loss:3.3416 train_time:1198009ms step_avg:401.07ms step:2998/3242 train_loss:3.2249 train_time:1198403ms step_avg:401.07ms step:2999/3242 train_loss:3.3783 train_time:1198797ms step_avg:401.07ms step:3000/3242 train_loss:3.1964 train_time:1199192ms step_avg:401.07ms step:3000/3242 val_loss:3.2800 train_time:1199217ms step_avg:401.08ms step:3001/3242 train_loss:3.1593 train_time:1199588ms step_avg:401.07ms step:3002/3242 train_loss:3.2671 train_time:1199982ms step_avg:401.06ms step:3003/3242 train_loss:3.2434 train_time:1200376ms step_avg:401.06ms step:3004/3242 train_loss:3.0918 train_time:1200771ms step_avg:401.06ms step:3005/3242 train_loss:3.1501 train_time:1201165ms step_avg:401.06ms step:3006/3242 train_loss:3.3447 train_time:1201560ms step_avg:401.05ms step:3007/3242 train_loss:3.6196 train_time:1201953ms step_avg:401.05ms step:3008/3242 train_loss:3.2978 train_time:1202347ms step_avg:401.05ms step:3009/3242 train_loss:3.2219 train_time:1202741ms step_avg:401.05ms step:3010/3242 train_loss:3.2253 train_time:1203136ms step_avg:401.05ms step:3011/3242 train_loss:3.3479 train_time:1203530ms step_avg:401.04ms step:3012/3242 train_loss:3.2469 train_time:1203925ms step_avg:401.04ms step:3013/3242 train_loss:3.1557 train_time:1204320ms step_avg:401.04ms step:3014/3242 train_loss:3.2595 train_time:1204716ms step_avg:401.04ms step:3015/3242 train_loss:3.4147 train_time:1205111ms step_avg:401.04ms step:3016/3242 train_loss:3.1346 train_time:1205506ms step_avg:401.03ms step:3017/3242 train_loss:3.2487 train_time:1205901ms step_avg:401.03ms step:3018/3242 train_loss:3.2629 train_time:1206296ms step_avg:401.03ms step:3019/3242 train_loss:3.2857 train_time:1206690ms step_avg:401.03ms step:3020/3242 train_loss:3.4001 train_time:1207084ms step_avg:401.02ms step:3021/3242 train_loss:3.1774 train_time:1207478ms step_avg:401.02ms step:3022/3242 train_loss:3.3484 train_time:1207872ms step_avg:401.02ms step:3023/3242 train_loss:3.2689 train_time:1208266ms step_avg:401.02ms step:3024/3242 train_loss:3.2402 train_time:1208660ms step_avg:401.02ms step:3025/3242 train_loss:3.2780 train_time:1209053ms step_avg:401.01ms step:3026/3242 train_loss:2.8506 train_time:1209448ms step_avg:401.01ms step:3027/3242 train_loss:4.1419 train_time:1209843ms step_avg:401.01ms step:3028/3242 train_loss:3.3286 train_time:1210238ms step_avg:401.01ms step:3029/3242 train_loss:3.2376 train_time:1210632ms step_avg:401.00ms step:3030/3242 train_loss:3.2776 train_time:1211027ms step_avg:401.00ms step:3031/3242 train_loss:3.2276 train_time:1211422ms step_avg:401.00ms step:3032/3242 train_loss:3.2852 train_time:1211818ms step_avg:401.00ms step:3033/3242 train_loss:3.2872 train_time:1212213ms step_avg:401.00ms step:3034/3242 train_loss:3.3207 train_time:1212609ms step_avg:400.99ms step:3035/3242 train_loss:3.1633 train_time:1213004ms step_avg:400.99ms step:3036/3242 train_loss:3.2606 train_time:1213398ms step_avg:400.99ms step:3037/3242 train_loss:3.3869 train_time:1213793ms step_avg:400.99ms step:3038/3242 train_loss:3.2225 train_time:1214187ms step_avg:400.99ms step:3039/3242 train_loss:3.1755 train_time:1214582ms step_avg:400.98ms step:3040/3242 train_loss:3.3550 train_time:1215159ms step_avg:401.04ms step:3041/3242 train_loss:3.4103 train_time:1215561ms step_avg:401.04ms step:3042/3242 train_loss:3.1901 train_time:1215956ms step_avg:401.04ms step:3043/3242 train_loss:3.1260 train_time:1216350ms step_avg:401.04ms step:3044/3242 train_loss:3.2107 train_time:1216745ms step_avg:401.04ms step:3045/3242 train_loss:3.1161 train_time:1217140ms step_avg:401.03ms step:3046/3242 train_loss:3.2762 train_time:1217535ms step_avg:401.03ms step:3047/3242 train_loss:3.1828 train_time:1217930ms step_avg:401.03ms step:3048/3242 train_loss:3.7310 train_time:1218516ms step_avg:401.09ms step:3049/3242 train_loss:2.9030 train_time:1218911ms step_avg:401.09ms step:3050/3242 train_loss:3.2844 train_time:1219307ms step_avg:401.09ms step:3051/3242 train_loss:3.2580 train_time:1219702ms step_avg:401.09ms step:3052/3242 train_loss:3.2993 train_time:1220096ms step_avg:401.08ms step:3053/3242 train_loss:3.4429 train_time:1220489ms step_avg:401.08ms step:3054/3242 train_loss:3.2169 train_time:1220884ms step_avg:401.08ms step:3055/3242 train_loss:3.3013 train_time:1221278ms step_avg:401.08ms step:3056/3242 train_loss:3.3249 train_time:1221671ms step_avg:401.07ms step:3057/3242 train_loss:3.2663 train_time:1222065ms step_avg:401.07ms step:3058/3242 train_loss:3.3041 train_time:1222459ms step_avg:401.07ms step:3059/3242 train_loss:3.1806 train_time:1222853ms step_avg:401.07ms step:3060/3242 train_loss:3.2057 train_time:1223247ms step_avg:401.06ms step:3061/3242 train_loss:3.4590 train_time:1223642ms step_avg:401.06ms step:3062/3242 train_loss:3.3818 train_time:1224036ms step_avg:401.06ms step:3063/3242 train_loss:3.3701 train_time:1224431ms step_avg:401.06ms step:3064/3242 train_loss:3.1990 train_time:1224825ms step_avg:401.06ms step:3065/3242 train_loss:3.1892 train_time:1225220ms step_avg:401.05ms step:3066/3242 train_loss:3.4049 train_time:1225615ms step_avg:401.05ms step:3067/3242 train_loss:3.2085 train_time:1226008ms step_avg:401.05ms step:3068/3242 train_loss:3.3304 train_time:1226407ms step_avg:401.05ms step:3069/3242 train_loss:3.3519 train_time:1226802ms step_avg:401.05ms step:3070/3242 train_loss:3.2531 train_time:1227198ms step_avg:401.04ms step:3071/3242 train_loss:3.2740 train_time:1227591ms step_avg:401.04ms step:3072/3242 train_loss:3.3191 train_time:1227986ms step_avg:401.04ms step:3073/3242 train_loss:3.2307 train_time:1228383ms step_avg:401.04ms step:3074/3242 train_loss:3.1129 train_time:1228778ms step_avg:401.04ms step:3075/3242 train_loss:3.2464 train_time:1229172ms step_avg:401.03ms step:3076/3242 train_loss:3.1975 train_time:1229566ms step_avg:401.03ms step:3077/3242 train_loss:3.1464 train_time:1229960ms step_avg:401.03ms step:3078/3242 train_loss:3.3039 train_time:1230355ms step_avg:401.03ms step:3079/3242 train_loss:3.3178 train_time:1230750ms step_avg:401.03ms step:3080/3242 train_loss:3.1974 train_time:1231145ms step_avg:401.02ms step:3081/3242 train_loss:3.2348 train_time:1231540ms step_avg:401.02ms step:3082/3242 train_loss:3.2161 train_time:1231935ms step_avg:401.02ms step:3083/3242 train_loss:3.2179 train_time:1232330ms step_avg:401.02ms step:3084/3242 train_loss:3.4242 train_time:1232724ms step_avg:401.02ms step:3085/3242 train_loss:3.0988 train_time:1233119ms step_avg:401.01ms step:3086/3242 train_loss:4.0542 train_time:1233514ms step_avg:401.01ms step:3087/3242 train_loss:3.1918 train_time:1233909ms step_avg:401.01ms step:3088/3242 train_loss:3.3029 train_time:1234305ms step_avg:401.01ms step:3089/3242 train_loss:3.4346 train_time:1234699ms step_avg:401.01ms step:3090/3242 train_loss:3.1431 train_time:1235094ms step_avg:401.00ms step:3091/3242 train_loss:3.2177 train_time:1235490ms step_avg:401.00ms step:3092/3242 train_loss:3.2215 train_time:1235886ms step_avg:401.00ms step:3093/3242 train_loss:3.2751 train_time:1236281ms step_avg:401.00ms step:3094/3242 train_loss:3.2723 train_time:1236676ms step_avg:401.00ms step:3095/3242 train_loss:3.1003 train_time:1237071ms step_avg:401.00ms step:3096/3242 train_loss:3.3212 train_time:1237465ms step_avg:400.99ms step:3097/3242 train_loss:3.2107 train_time:1237859ms step_avg:400.99ms step:3098/3242 train_loss:3.4968 train_time:1238253ms step_avg:400.99ms step:3099/3242 train_loss:3.2819 train_time:1238647ms step_avg:400.99ms step:3100/3242 train_loss:3.2133 train_time:1239042ms step_avg:400.98ms step:3101/3242 train_loss:3.2679 train_time:1239436ms step_avg:400.98ms step:3102/3242 train_loss:3.1982 train_time:1239830ms step_avg:400.98ms step:3103/3242 train_loss:3.2104 train_time:1240225ms step_avg:400.98ms step:3104/3242 train_loss:3.6816 train_time:1240620ms step_avg:400.98ms step:3105/3242 train_loss:3.2337 train_time:1241015ms step_avg:400.97ms step:3106/3242 train_loss:3.2932 train_time:1241409ms step_avg:400.97ms step:3107/3242 train_loss:3.3413 train_time:1241803ms step_avg:400.97ms step:3108/3242 train_loss:3.2835 train_time:1242198ms step_avg:400.97ms step:3109/3242 train_loss:3.3591 train_time:1242592ms step_avg:400.97ms step:3110/3242 train_loss:3.2200 train_time:1242986ms step_avg:400.96ms step:3111/3242 train_loss:3.7475 train_time:1243381ms step_avg:400.96ms step:3112/3242 train_loss:3.4171 train_time:1243775ms step_avg:400.96ms step:3113/3242 train_loss:2.9088 train_time:1244168ms step_avg:400.96ms step:3114/3242 train_loss:3.1724 train_time:1244562ms step_avg:400.95ms step:3115/3242 train_loss:3.0180 train_time:1244956ms step_avg:400.95ms step:3116/3242 train_loss:3.4513 train_time:1245350ms step_avg:400.95ms step:3117/3242 train_loss:3.2820 train_time:1245744ms step_avg:400.95ms step:3118/3242 train_loss:3.4305 train_time:1246139ms step_avg:400.95ms step:3119/3242 train_loss:3.2718 train_time:1246532ms step_avg:400.94ms step:3120/3242 train_loss:3.3761 train_time:1246926ms step_avg:400.94ms step:3121/3242 train_loss:3.3596 train_time:1247320ms step_avg:400.94ms step:3122/3242 train_loss:3.2794 train_time:1247715ms step_avg:400.94ms step:3123/3242 train_loss:3.1802 train_time:1248110ms step_avg:400.93ms step:3124/3242 train_loss:3.1640 train_time:1248504ms step_avg:400.93ms step:3125/3242 train_loss:3.2658 train_time:1248898ms step_avg:400.93ms step:3125/3242 val_loss:3.2613 train_time:1248924ms step_avg:400.94ms step:3126/3242 train_loss:3.6730 train_time:1249293ms step_avg:400.93ms step:3127/3242 train_loss:3.2764 train_time:1249688ms step_avg:400.93ms step:3128/3242 train_loss:3.1894 train_time:1250082ms step_avg:400.92ms step:3129/3242 train_loss:3.3656 train_time:1250477ms step_avg:400.92ms step:3130/3242 train_loss:3.3919 train_time:1250871ms step_avg:400.92ms step:3131/3242 train_loss:3.3918 train_time:1251265ms step_avg:400.92ms step:3132/3242 train_loss:3.1114 train_time:1251659ms step_avg:400.92ms step:3133/3242 train_loss:3.2642 train_time:1252053ms step_avg:400.91ms step:3134/3242 train_loss:3.2554 train_time:1252447ms step_avg:400.91ms step:3135/3242 train_loss:3.3476 train_time:1252841ms step_avg:400.91ms step:3136/3242 train_loss:3.2894 train_time:1253236ms step_avg:400.91ms step:3137/3242 train_loss:3.0541 train_time:1253630ms step_avg:400.91ms step:3138/3242 train_loss:3.1308 train_time:1254026ms step_avg:400.90ms step:3139/3242 train_loss:3.2900 train_time:1254422ms step_avg:400.90ms step:3140/3242 train_loss:3.3230 train_time:1254817ms step_avg:400.90ms step:3141/3242 train_loss:3.1056 train_time:1255212ms step_avg:400.90ms step:3142/3242 train_loss:3.3033 train_time:1255607ms step_avg:400.90ms step:3143/3242 train_loss:3.1039 train_time:1256003ms step_avg:400.89ms step:3144/3242 train_loss:3.4719 train_time:1256399ms step_avg:400.89ms step:3145/3242 train_loss:3.6975 train_time:1256795ms step_avg:400.89ms step:3146/3242 train_loss:3.5441 train_time:1257190ms step_avg:400.89ms step:3147/3242 train_loss:3.2213 train_time:1257586ms step_avg:400.89ms step:3148/3242 train_loss:3.2797 train_time:1257982ms step_avg:400.89ms step:3149/3242 train_loss:3.1947 train_time:1258378ms step_avg:400.88ms step:3150/3242 train_loss:3.3819 train_time:1258773ms step_avg:400.88ms step:3151/3242 train_loss:3.0917 train_time:1259167ms step_avg:400.88ms step:3152/3242 train_loss:3.3165 train_time:1259563ms step_avg:400.88ms step:3153/3242 train_loss:3.4393 train_time:1259958ms step_avg:400.88ms step:3154/3242 train_loss:3.2354 train_time:1260353ms step_avg:400.88ms step:3155/3242 train_loss:3.4336 train_time:1260748ms step_avg:400.87ms step:3156/3242 train_loss:3.8570 train_time:1261143ms step_avg:400.87ms step:3157/3242 train_loss:3.1653 train_time:1261540ms step_avg:400.87ms step:3158/3242 train_loss:3.2599 train_time:1261936ms step_avg:400.87ms step:3159/3242 train_loss:3.3993 train_time:1262332ms step_avg:400.87ms step:3160/3242 train_loss:3.1731 train_time:1262727ms step_avg:400.87ms step:3161/3242 train_loss:3.3228 train_time:1263122ms step_avg:400.86ms step:3162/3242 train_loss:3.0977 train_time:1263518ms step_avg:400.86ms step:3163/3242 train_loss:3.1878 train_time:1263913ms step_avg:400.86ms step:3164/3242 train_loss:3.3057 train_time:1264308ms step_avg:400.86ms step:3165/3242 train_loss:3.3566 train_time:1264704ms step_avg:400.86ms step:3166/3242 train_loss:3.2508 train_time:1265100ms step_avg:400.86ms step:3167/3242 train_loss:3.1986 train_time:1265496ms step_avg:400.85ms step:3168/3242 train_loss:3.3278 train_time:1265891ms step_avg:400.85ms step:3169/3242 train_loss:3.5561 train_time:1266287ms step_avg:400.85ms step:3170/3242 train_loss:3.4776 train_time:1266683ms step_avg:400.85ms step:3171/3242 train_loss:3.3472 train_time:1267079ms step_avg:400.85ms step:3172/3242 train_loss:3.3211 train_time:1267473ms step_avg:400.85ms step:3173/3242 train_loss:3.2749 train_time:1267868ms step_avg:400.84ms step:3174/3242 train_loss:3.1432 train_time:1268263ms step_avg:400.84ms step:3175/3242 train_loss:3.0602 train_time:1268659ms step_avg:400.84ms step:3176/3242 train_loss:3.0859 train_time:1269055ms step_avg:400.84ms step:3177/3242 train_loss:3.2718 train_time:1269451ms step_avg:400.84ms step:3178/3242 train_loss:3.0242 train_time:1269847ms step_avg:400.84ms step:3179/3242 train_loss:3.2286 train_time:1270243ms step_avg:400.83ms step:3180/3242 train_loss:3.3900 train_time:1270637ms step_avg:400.83ms step:3181/3242 train_loss:3.2727 train_time:1271032ms step_avg:400.83ms step:3182/3242 train_loss:3.3032 train_time:1271427ms step_avg:400.83ms step:3183/3242 train_loss:3.2985 train_time:1271823ms step_avg:400.83ms step:3184/3242 train_loss:3.1466 train_time:1272218ms step_avg:400.82ms step:3185/3242 train_loss:3.2483 train_time:1272613ms step_avg:400.82ms step:3186/3242 train_loss:3.4974 train_time:1273008ms step_avg:400.82ms step:3187/3242 train_loss:3.2527 train_time:1273403ms step_avg:400.82ms step:3188/3242 train_loss:3.2366 train_time:1273797ms step_avg:400.82ms step:3189/3242 train_loss:3.2009 train_time:1274193ms step_avg:400.82ms step:3190/3242 train_loss:3.3799 train_time:1274587ms step_avg:400.81ms step:3191/3242 train_loss:3.3245 train_time:1274983ms step_avg:400.81ms step:3192/3242 train_loss:3.2210 train_time:1275378ms step_avg:400.81ms step:3193/3242 train_loss:3.4267 train_time:1275774ms step_avg:400.81ms step:3194/3242 train_loss:3.3572 train_time:1276168ms step_avg:400.81ms step:3195/3242 train_loss:3.1665 train_time:1276564ms step_avg:400.80ms step:3196/3242 train_loss:3.2897 train_time:1276959ms step_avg:400.80ms step:3197/3242 train_loss:3.2316 train_time:1277353ms step_avg:400.80ms step:3198/3242 train_loss:3.1737 train_time:1277747ms step_avg:400.80ms step:3199/3242 train_loss:3.2136 train_time:1278142ms step_avg:400.80ms step:3200/3242 train_loss:3.1553 train_time:1278537ms step_avg:400.80ms step:3201/3242 train_loss:3.8797 train_time:1278931ms step_avg:400.79ms step:3202/3242 train_loss:3.2740 train_time:1279326ms step_avg:400.79ms step:3203/3242 train_loss:3.0636 train_time:1279721ms step_avg:400.79ms step:3204/3242 train_loss:3.2343 train_time:1280114ms step_avg:400.79ms step:3205/3242 train_loss:3.0246 train_time:1280508ms step_avg:400.79ms step:3206/3242 train_loss:3.0847 train_time:1280903ms step_avg:400.78ms step:3207/3242 train_loss:3.0606 train_time:1281297ms step_avg:400.78ms step:3208/3242 train_loss:3.2112 train_time:1281692ms step_avg:400.78ms step:3209/3242 train_loss:3.2836 train_time:1282086ms step_avg:400.78ms step:3210/3242 train_loss:3.1588 train_time:1282481ms step_avg:400.78ms step:3211/3242 train_loss:3.1932 train_time:1282876ms step_avg:400.77ms step:3212/3242 train_loss:3.2719 train_time:1283271ms step_avg:400.77ms step:3213/3242 train_loss:3.4691 train_time:1283823ms step_avg:400.82ms step:3214/3242 train_loss:3.0151 train_time:1284217ms step_avg:400.82ms step:3215/3242 train_loss:3.4280 train_time:1284610ms step_avg:400.81ms step:3216/3242 train_loss:3.3532 train_time:1285005ms step_avg:400.81ms step:3217/3242 train_loss:3.4145 train_time:1285400ms step_avg:400.81ms step:3218/3242 train_loss:3.1176 train_time:1285794ms step_avg:400.81ms step:3219/3242 train_loss:3.0850 train_time:1286188ms step_avg:400.81ms step:3220/3242 train_loss:3.1353 train_time:1286583ms step_avg:400.80ms step:3221/3242 train_loss:3.5615 train_time:1286978ms step_avg:400.80ms step:3222/3242 train_loss:3.0247 train_time:1287373ms step_avg:400.80ms step:3223/3242 train_loss:3.2331 train_time:1287768ms step_avg:400.80ms step:3224/3242 train_loss:3.1439 train_time:1288162ms step_avg:400.80ms step:3225/3242 train_loss:3.2136 train_time:1288556ms step_avg:400.80ms step:3226/3242 train_loss:3.2299 train_time:1288949ms step_avg:400.79ms step:3227/3242 train_loss:3.4030 train_time:1289344ms step_avg:400.79ms step:3228/3242 train_loss:3.3197 train_time:1289739ms step_avg:400.79ms step:3229/3242 train_loss:3.2296 train_time:1290134ms step_avg:400.79ms step:3230/3242 train_loss:3.1716 train_time:1290718ms step_avg:400.84ms step:3231/3242 train_loss:3.1359 train_time:1291122ms step_avg:400.84ms step:3232/3242 train_loss:3.0979 train_time:1291517ms step_avg:400.84ms step:3233/3242 train_loss:3.2401 train_time:1291912ms step_avg:400.84ms step:3234/3242 train_loss:3.0975 train_time:1292308ms step_avg:400.84ms step:3235/3242 train_loss:3.1627 train_time:1292703ms step_avg:400.84ms step:3236/3242 train_loss:3.2757 train_time:1293099ms step_avg:400.84ms step:3237/3242 train_loss:3.0903 train_time:1293494ms step_avg:400.83ms step:3238/3242 train_loss:3.0001 train_time:1293889ms step_avg:400.83ms step:3239/3242 train_loss:3.3200 train_time:1294476ms step_avg:400.89ms step:3240/3242 train_loss:3.9170 train_time:1294870ms step_avg:400.89ms step:3241/3242 train_loss:3.2893 train_time:1295265ms step_avg:400.89ms step:3242/3242 train_loss:3.1107 train_time:1295660ms step_avg:400.88ms step:3242/3242 val_loss:3.2484 train_time:1295686ms step_avg:400.89ms