==================================================================================================== import os import sys with open(sys.argv[0]) as f: code = f.read() # read the code of this file ASAP, for logging import uuid import glob import time from dataclasses import dataclass import numpy as np import torch from torch import nn import torch.nn.functional as F import torch.distributed as dist import torch._inductor.config as config from torch.nn.parallel import DistributedDataParallel as DDP # ----------------------------------------------------------------------------- # Muon optimizer def zeropower_via_svd(G, steps=None): U, S, V = G.svd() return U @ V.T @torch.compile def zeropower_via_newtonschulz5(G, steps=10, eps=1e-7): """ Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. We opt to use a quintic iteration whose coefficients are selected to maximize the slope at zero. For the purpose of minimizing steps, it turns out to be empirically effective to keep increasing the slope at zero even beyond the point where the iteration no longer converges all the way to one everywhere on the interval. This iteration therefore does not produce UV^T but rather something like US'V^T where S' is diagonal with S_{ii}' \sim Uniform(0.5, 1.5), which turns out not to hurt model performance at all relative to UV^T, where USV^T = G is the SVD. """ assert len(G.shape) == 2 a, b, c = (3.4445, -4.7750, 2.0315) X = G.bfloat16() X /= (X.norm() + eps) # ensure top singular value <= 1 if G.size(0) > G.size(1): X = X.T for _ in range(steps): A = X @ X.T B = A @ X X = a * X + b * B + c * A @ B if G.size(0) > G.size(1): X = X.T return X zeropower_backends = dict(svd=zeropower_via_svd, newtonschulz5=zeropower_via_newtonschulz5) class Muon(torch.optim.Optimizer): """ Muon - MomentUm Orthogonalized by Newton-schulz Muon internally runs standard SGD-momentum, and then performs an orthogonalization post- processing step, in which each 2D parameter's update is replaced with the nearest orthogonal matrix. To efficiently orthogonalize each update, we use a Newton-Schulz iteration, which has the advantage that it can be stably run in bfloat16 on the GPU. Some warnings: - This optimizer assumes that all parameters passed in are 2D. - It should not be used for the embedding layer, the final fully connected layer, or any {0,1}-D parameters; those should all be optimized by a standard method (e.g., AdamW). - To use it with 4D convolutional filters, it works well to just flatten their last 3 dimensions. - We believe it is unlikely to work well for training with small batch size. - We believe it may not work well for finetuning pretrained models, but we haven't tested this. - We have not yet tried this optimizer for training scenarios larger than NanoGPT (124M). Arguments: lr: The learning rate used by the internal SGD. momentum: The momentum used by the internal SGD. nesterov: Whether to use Nesterov-style momentum in the internal SGD. (recommended) backend: The chosen backend for the orthogonalization step. (recommended: 'newtonschulz5') backend_steps: The number of iteration steps to use in the backend, if it is iterative. """ def __init__(self, params, lr=0.02, momentum=0.95, nesterov=True, backend='newtonschulz5', backend_steps=5): defaults = dict(lr=lr, momentum=momentum, nesterov=nesterov, backend=backend, backend_steps=backend_steps) super().__init__(params, defaults) def step(self): for group in self.param_groups: lr = group['lr'] momentum = group['momentum'] zeropower_backend = zeropower_backends[group['backend']] # generate weight updates in distributed fashion total_params = sum(p.numel() for p in group['params']) updates_flat = torch.zeros(total_params, device='cuda', dtype=torch.bfloat16) curr_idx = 0 for i, p in enumerate(group['params']): # luckily this will perfectly distribute a transformer with multiple of 4 layers to 8 GPUs if i % int(os.environ['WORLD_SIZE']) == int(os.environ['RANK']): g = p.grad assert g is not None state = self.state[p] if 'momentum_buffer' not in state: state['momentum_buffer'] = torch.zeros_like(g) buf = state['momentum_buffer'] buf.mul_(momentum).add_(g) if group['nesterov']: g = g.add(buf, alpha=momentum) g = zeropower_backend(g, steps=group['backend_steps']) g *= max(1, g.size(0)/g.size(1))**0.5 updates_flat[curr_idx:curr_idx+p.numel()] = g.flatten() curr_idx += p.numel() # sync updates across devices. we are not memory-constrained so can do this simple deserialization dist.all_reduce(updates_flat, op=dist.ReduceOp.SUM) # deserialize and apply updates curr_idx = 0 for p in group['params']: g = updates_flat[curr_idx:curr_idx+p.numel()].view_as(p.data).type_as(p.data) p.data.add_(g, alpha=-lr) curr_idx += p.numel() # ----------------------------------------------------------------------------- # PyTorch nn.Module definitions for the GPT-2 model class Rotary(torch.nn.Module): def __init__(self, dim, base=10000): super().__init__() self.inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim)) self.seq_len_cached = None self.cos_cached = None self.sin_cached = None def forward(self, x): seq_len = x.shape[1] if seq_len != self.seq_len_cached: self.seq_len_cached = seq_len t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq) freqs = torch.outer(t, self.inv_freq).to(x.device) self.cos_cached = freqs.cos().bfloat16() self.sin_cached = freqs.sin().bfloat16() return self.cos_cached[None, :, None, :], self.sin_cached[None, :, None, :] def apply_rotary_emb(x, cos, sin): assert x.ndim == 4 # multihead attention d = x.shape[3]//2 x1 = x[..., :d] x2 = x[..., d:] y1 = x1 * cos + x2 * sin y2 = x1 * (-sin) + x2 * cos return torch.cat([y1, y2], 3).type_as(x) class CausalSelfAttention(nn.Module): def __init__(self, config): super().__init__() self.n_head = config.n_head self.n_embd = config.n_embd self.head_dim = self.n_embd // self.n_head assert self.n_embd % self.n_head == 0 self.c_q = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_k = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_v = nn.Linear(self.n_embd, self.n_embd, bias=False) # output projection self.c_proj = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 self.rotary = Rotary(self.head_dim) self.lamb = nn.Parameter(torch.tensor(0.5)) # @Grad62304977 def forward(self, x, v1=None): B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd) q = self.c_q(x).view(B, T, self.n_head, self.head_dim) k = self.c_k(x).view(B, T, self.n_head, self.head_dim) v = self.c_v(x).view(B, T, self.n_head, self.head_dim) if v1 is None: v1 = v # This happens if we are in the first block. v needs to be accessed by subsequent blocks v = (1 - self.lamb) * v + self.lamb * v1.view_as(v) # @Grad62304977 cos, sin = self.rotary(q) q, k = F.rms_norm(q, (q.size(-1),)), F.rms_norm(k, (k.size(-1),)) # QK norm suggested by @Grad62304977 q, k = apply_rotary_emb(q, cos, sin), apply_rotary_emb(k, cos, sin) y = F.scaled_dot_product_attention(q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), is_causal=True) y = y.transpose(1, 2).contiguous().view_as(x) # re-assemble all head outputs side by side y = self.c_proj(y) return y, v1 class MLP(nn.Module): def __init__(self, config): super().__init__() self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd, bias=False) self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 def forward(self, x): x = self.c_fc(x) x = F.relu(x).square() # https://arxiv.org/abs/2109.08668v2; ~1-2% better than GELU; suggested by @SKYLINEZ007 and @Grad62304977 x = self.c_proj(x) return x class Block(nn.Module): def __init__(self, config): super().__init__() self.attn = CausalSelfAttention(config) self.mlp = MLP(config) self.lambdas = nn.Parameter(torch.tensor([1., 0.])) def forward(self, x, v1, x0): x = self.lambdas[0] * x + self.lambdas[1] * x0 x1, v1 = self.attn(F.rms_norm(x, (x.size(-1),)), v1) x = x + x1 x = x + self.mlp(F.rms_norm(x, (x.size(-1),))) return x, v1 # ----------------------------------------------------------------------------- # The main GPT-2 model @dataclass class GPTConfig: vocab_size : int = 50304 n_layer : int = 12 n_head : int = 6 # head dim 128 suggested by @Grad62304977 n_embd : int = 768 class GPT(nn.Module): def __init__(self, config): super().__init__() self.config = config self.transformer = nn.ModuleDict(dict( wte = nn.Embedding(config.vocab_size, config.n_embd), h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]), )) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.lm_head.weight.data.zero_() # @Grad62304977 def forward(self, idx, targets=None, return_logits=True): # forward the GPT model itself x = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) x = F.rms_norm(x, (x.size(-1),)) # @Grad62304977 x0 = x v1 = None for block in self.transformer.h: x, v1 = block(x, v1, x0) x = F.rms_norm(x, (x.size(-1),)) if targets is not None: # if we are given some desired targets also calculate the loss logits = self.lm_head(x) logits = logits.float() # use tf32/fp32 for logits loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1) else: # inference-time mini-optimization: only forward the lm_head on the very last position logits = self.lm_head(x[:, [-1], :]) # note: using list [-1] to preserve the time dim logits = logits.float() # use tf32/fp32 for logits loss = None # there are performance reasons why not returning logits is prudent, if not needed if not return_logits: logits = None return logits, loss # ----------------------------------------------------------------------------- # Our own simple Distributed Data Loader def _peek_data_shard(filename): # only reads the header, returns header data with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) if header[0] != 20240520: print("ERROR: magic number mismatch in the data .bin file!") print("---> HINT: Are you passing in a correct file with --input_bin?") print("---> HINT: Dataset encoding changed recently, re-run data prepro or refer again to README") print("---> HINT: For example re-run: `python dev/data/tinyshakespeare.py`, then re-try") exit(1) assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) return ntok # for now just return the number of tokens def _load_data_shard(filename): with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) assert header[0] == 20240520, "magic number mismatch in the data .bin file" assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) # the rest of it are tokens, stored as uint16 tokens = np.frombuffer(f.read(), dtype=np.uint16) assert len(tokens) == ntok, "number of tokens read does not match header?" return tokens class DistributedDataLoader: def __init__(self, filename_pattern, B, T, process_rank, num_processes): self.process_rank = process_rank self.num_processes = num_processes self.B = B self.T = T # glob files that match the pattern self.files = sorted(glob.glob(filename_pattern)) assert len(self.files) > 0, f"did not find any files that match the pattern {filename_pattern}" # load and validate all data shards, count number of tokens in total ntok_total = 0 for fname in self.files: shard_ntok = _peek_data_shard(fname) assert shard_ntok >= num_processes * B * T + 1 ntok_total += int(shard_ntok) self.ntok_total = ntok_total # kick things off self.reset() def reset(self): self.current_shard = 0 self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def advance(self): # advance to next data shard self.current_shard = (self.current_shard + 1) % len(self.files) self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def next_batch(self): B = self.B T = self.T buf = self.tokens[self.current_position : self.current_position+B*T+1] buf = torch.tensor(buf.astype(np.int32), dtype=torch.long) x = (buf[:-1]).view(B, T) # inputs y = (buf[1:]).view(B, T) # targets # advance current position and load next shard if necessary self.current_position += B * T * self.num_processes if self.current_position + (B * T * self.num_processes + 1) > len(self.tokens): self.advance() return x.cuda(), y.cuda() # ----------------------------------------------------------------------------- # int main @dataclass class Hyperparameters: # data hyperparams input_bin : str = 'data/fineweb10B/fineweb_train_*.bin' # input .bin to train on input_val_bin : str = 'data/fineweb10B/fineweb_val_*.bin' # input .bin to eval validation loss on # optimization hyperparams batch_size : int = 8*64 # batch size, in sequences, across all devices device_batch_size : int = 64 # batch size, in sequences, per device sequence_length : int = 1024 # sequence length, in tokens num_iterations : int = 3200 # number of iterations to run warmup_iters : int = 0 warmdown_iters : int = 914 # number of iterations of linear warmup/warmdown for triangular or trapezoidal schedule weight_decay : float = 0 # evaluation and logging hyperparams val_loss_every : int = 125 # every how many steps to evaluate val loss? 0 for only at the end val_tokens : int = 10485760 # how many tokens of validation data? it's important to keep this fixed for consistent comparisons save_every : int = 0 # every how many steps to save the checkpoint? 0 for only at the end args = Hyperparameters() # set up DDP (distributed data parallel). torchrun sets this env variable assert torch.cuda.is_available() dist.init_process_group(backend='nccl') ddp_rank = int(os.environ['RANK']) ddp_local_rank = int(os.environ['LOCAL_RANK']) ddp_world_size = int(os.environ['WORLD_SIZE']) device = f'cuda:{ddp_local_rank}' torch.cuda.set_device(device) print(f"using device: {device}") master_process = (ddp_rank == 0) # this process will do logging, checkpointing etc. # convenience variables B, T = args.device_batch_size, args.sequence_length # calculate the number of steps to take in the val loop. assert args.val_tokens % (B * T * ddp_world_size) == 0 val_steps = args.val_tokens // (B * T * ddp_world_size) # calculate the steps of gradient accumulation required to attain the desired global batch size. assert args.batch_size % (B * ddp_world_size) == 0 train_accumulation_steps = args.batch_size // (B * ddp_world_size) # load tokens train_loader = DistributedDataLoader(args.input_bin, B, T, ddp_rank, ddp_world_size) val_loader = DistributedDataLoader(args.input_val_bin, B, T, ddp_rank, ddp_world_size) if master_process: print(f"Training DataLoader: total number of tokens: {train_loader.ntok_total} across {len(train_loader.files)} files") print(f"Validation DataLoader: total number of tokens: {val_loader.ntok_total} across {len(val_loader.files)} files") x, y = train_loader.next_batch() # there are only 50257 unique GPT-2 tokens; we extend to nearest multiple of 128 for efficiency. suggested to me by @Grad62304977. # this originates from Karpathy's experiments. num_vocab = 50304 model = GPT(GPTConfig(vocab_size=num_vocab, n_layer=12, n_head=6, n_embd=768)) model = model.cuda() if hasattr(config, "coordinate_descent_tuning"): config.coordinate_descent_tuning = True # suggested by @Chillee model = torch.compile(model) # here we wrap model into DDP container model = DDP(model, device_ids=[ddp_local_rank]) raw_model = model.module # always contains the "raw" unwrapped model ctx = torch.amp.autocast(device_type='cuda', dtype=torch.bfloat16) # CUDNN attention is ~4ms faster than Flash, but doesn't get selected by default in PyTorch 2.5.1 from torch.backends.cuda import enable_cudnn_sdp, enable_flash_sdp, enable_math_sdp, enable_mem_efficient_sdp enable_cudnn_sdp(True) enable_flash_sdp(False) enable_mem_efficient_sdp(False) enable_math_sdp(False) # init the optimizer(s) optimizer1 = torch.optim.Adam([raw_model.transformer.wte.weight], lr=0.3, betas=(0.9, 0.95), fused=True) optimizer2 = torch.optim.Adam([raw_model.lm_head.weight], lr=0.002, betas=(0.9, 0.95), fused=True) params = list(raw_model.transformer.h.parameters()) matrix_params = [p for p in params if p.ndim == 2] scalar_params = [p for p in params if p.ndim < 2] optimizer3 = Muon(matrix_params, lr=0.02, momentum=0.95) optimizer4 = torch.optim.Adam(scalar_params, lr=0.02, betas=(0.9, 0.95), fused=True) # note that this learning rate is neither sensitive nor tuned optimizers = [optimizer1, optimizer2, optimizer3, optimizer4] # learning rate decay scheduler (linear warmup and warmdown) def get_lr(it): assert it <= args.num_iterations # 1) linear warmup for warmup_iters steps if it < args.warmup_iters: return (it+1) / args.warmup_iters # 2) constant lr for a while elif it < args.num_iterations - args.warmdown_iters: return 1.0 # 3) linear warmdown else: decay_ratio = (args.num_iterations - it) / args.warmdown_iters return decay_ratio schedulers = [torch.optim.lr_scheduler.LambdaLR(opt, get_lr) for opt in optimizers] # begin logging if master_process: run_id = str(uuid.uuid4()) logdir = 'logs/%s/' % run_id os.makedirs(logdir, exist_ok=True) logfile = 'logs/%s.txt' % run_id # create the log file with open(logfile, "w") as f: # begin the log by printing this file (the Python code) f.write('='*100 + '\n') f.write(code) f.write('='*100 + '\n') # log information about the hardware/software environment this is running on # and print the full `nvidia-smi` to file f.write(f"Running pytorch {torch.version.__version__} compiled for CUDA {torch.version.cuda}\nnvidia-smi:\n") import subprocess result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) f.write(f'{result.stdout}\n') f.write('='*100 + '\n') training_time_ms = 0 # start the clock torch.cuda.synchronize() t0 = time.time() # begin training train_loader.reset() for step in range(args.num_iterations + 1): last_step = (step == args.num_iterations) # This effectively ignores timing first 10 steps, which are slower for weird reasons. # Alternately, and slightly more correctly in terms of benchmarking, we could do 10 # steps with dummy data first, and then re-initialize the model and reset the loader. if step == 10: training_time_ms = 0 t0 = time.time() timed_steps = float('nan') if step <= 11 else (step - 10) + 1 # <= 11 to avoid bug in val # once in a while evaluate the validation dataset if (last_step or (args.val_loss_every > 0 and step % args.val_loss_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # run validation batches model.eval() val_loader.reset() val_loss = 0.0 for _ in range(val_steps): x_val, y_val = val_loader.next_batch() with ctx: # of course, we'd like to use no_grad() here too, but that creates a torch.compile error for some reason _, loss = model(x_val, y_val, return_logits=False) val_loss += loss.detach() del loss dist.all_reduce(val_loss, op=dist.ReduceOp.AVG) val_loss /= val_steps # log val loss to console and to logfile if master_process: print(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms') with open(logfile, "a") as f: f.write(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms\n') # start the clock again torch.cuda.synchronize() t0 = time.time() if master_process and (last_step or (args.save_every > 0 and step % args.save_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # save the state of the training process log = dict(step=step, code=code, model=raw_model.state_dict(), optimizers=[opt.state_dict() for opt in optimizers]) torch.save(log, 'logs/%s/state_step%06d.pt' % (run_id, step)) # start the clock again torch.cuda.synchronize() t0 = time.time() # bit confusing: we want to make sure to eval on 0th iteration # but also after the very last iteration. so we loop for step <= num_iterations # instead of just < num_iterations (one extra due to <=), only to do # the validation/sampling one last time, and then we break right here as we're done. if last_step: break # --------------- TRAINING SECTION BEGIN ----------------- model.train() for i in range(1, train_accumulation_steps+1): # forward pass with ctx: _, loss = model(x, y, return_logits=False) train_loss = loss.detach() # advance the dataset for the next batch x, y = train_loader.next_batch() # backward pass if i < train_accumulation_steps: with model.no_sync(): # there's no need to sync gradients every accumulation step loss.backward() else: loss.backward() # just sync on the last step for p in model.parameters(): p.grad /= train_accumulation_steps # momentum warmup for Muon frac = min(step/500, 1) optimizer3.param_groups[0]['momentum'] = (1 - frac) * 0.85 + frac * 0.95 # step the optimizers and schedulers for opt, sched in zip(optimizers, schedulers): opt.step() sched.step() # null the gradients model.zero_grad(set_to_none=True) # --------------- TRAINING SECTION END ------------------- # everything that follows now is just diagnostics, prints, logging, etc. #dist.all_reduce(train_loss, op=dist.ReduceOp.AVG) # all-reducing the training loss would be more correct in terms of logging, but slower if master_process: approx_time = training_time_ms + 1000 * (time.time() - t0) print(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms") with open(logfile, "a") as f: f.write(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms\n") if master_process: print(f"peak memory consumption: {torch.cuda.max_memory_allocated() // 1024 // 1024} MiB") # ------------------------------------------------------------------------- # clean up nice dist.destroy_process_group() ==================================================================================================== Running pytorch 2.5.1+cu124 compiled for CUDA 12.4 nvidia-smi: Wed Nov 6 20:25:19 2024 +-----------------------------------------------------------------------------------------+ | NVIDIA-SMI 555.42.06 Driver Version: 555.42.06 CUDA Version: 12.5 | |-----------------------------------------+------------------------+----------------------+ | GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |=========================================+========================+======================| | 0 NVIDIA H100 80GB HBM3 Off | 00000000:18:00.0 Off | 0 | | N/A 35C P0 141W / 700W | 5304MiB / 81559MiB | 2% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 1 NVIDIA H100 80GB HBM3 Off | 00000000:2A:00.0 Off | 0 | | N/A 37C P0 133W / 700W | 5352MiB / 81559MiB | 3% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 2 NVIDIA H100 80GB HBM3 Off | 00000000:3A:00.0 Off | 0 | | N/A 38C P0 127W / 700W | 5352MiB / 81559MiB | 3% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 3 NVIDIA H100 80GB HBM3 Off | 00000000:5D:00.0 Off | 0 | | N/A 34C P0 137W / 700W | 5352MiB / 81559MiB | 2% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 4 NVIDIA H100 80GB HBM3 Off | 00000000:9A:00.0 Off | 0 | | N/A 35C P0 142W / 700W | 5352MiB / 81559MiB | 5% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 5 NVIDIA H100 80GB HBM3 Off | 00000000:AB:00.0 Off | 0 | | N/A 39C P0 144W / 700W | 5352MiB / 81559MiB | 7% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 6 NVIDIA H100 80GB HBM3 Off | 00000000:BA:00.0 Off | 0 | | N/A 37C P0 143W / 700W | 5352MiB / 81559MiB | 5% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 7 NVIDIA H100 80GB HBM3 Off | 00000000:DB:00.0 Off | 0 | | N/A 35C P0 148W / 700W | 5112MiB / 81559MiB | 4% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ +-----------------------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=========================================================================================| | 0 N/A N/A 35444 C /usr/bin/python3 0MiB | | 1 N/A N/A 35445 C /usr/bin/python3 0MiB | | 2 N/A N/A 35446 C /usr/bin/python3 0MiB | | 3 N/A N/A 35447 C /usr/bin/python3 0MiB | | 4 N/A N/A 35448 C /usr/bin/python3 0MiB | | 5 N/A N/A 35449 C /usr/bin/python3 0MiB | | 6 N/A N/A 35450 C /usr/bin/python3 0MiB | | 7 N/A N/A 35451 C /usr/bin/python3 0MiB | +-----------------------------------------------------------------------------------------+ ==================================================================================================== step:0/3200 val_loss:10.8258 train_time:363ms step_avg:nanms step:1/3200 train_loss:10.8258 train_time:8179ms step_avg:nanms step:2/3200 train_loss:10.4288 train_time:8284ms step_avg:nanms step:3/3200 train_loss:9.9617 train_time:8432ms step_avg:nanms step:4/3200 train_loss:9.0538 train_time:8579ms step_avg:nanms step:5/3200 train_loss:8.0521 train_time:8726ms step_avg:nanms step:6/3200 train_loss:7.5091 train_time:8874ms step_avg:nanms step:7/3200 train_loss:6.9906 train_time:9021ms step_avg:nanms step:8/3200 train_loss:7.2549 train_time:9172ms step_avg:nanms step:9/3200 train_loss:6.9185 train_time:9327ms step_avg:nanms step:10/3200 train_loss:6.8477 train_time:9479ms step_avg:nanms step:11/3200 train_loss:6.6934 train_time:102ms step_avg:nanms step:12/3200 train_loss:6.6749 train_time:252ms step_avg:nanms step:13/3200 train_loss:6.5421 train_time:400ms step_avg:133.38ms step:14/3200 train_loss:6.4687 train_time:550ms step_avg:137.52ms step:15/3200 train_loss:6.4516 train_time:701ms step_avg:140.18ms step:16/3200 train_loss:6.4102 train_time:855ms step_avg:142.48ms step:17/3200 train_loss:6.4078 train_time:1006ms step_avg:143.66ms step:18/3200 train_loss:6.4579 train_time:1156ms step_avg:144.47ms step:19/3200 train_loss:6.2983 train_time:1306ms step_avg:145.06ms step:20/3200 train_loss:6.3181 train_time:1455ms step_avg:145.47ms step:21/3200 train_loss:6.0253 train_time:1604ms step_avg:145.78ms step:22/3200 train_loss:6.3410 train_time:1756ms step_avg:146.34ms step:23/3200 train_loss:6.5699 train_time:1907ms step_avg:146.66ms step:24/3200 train_loss:6.2329 train_time:2057ms step_avg:146.96ms step:25/3200 train_loss:6.3758 train_time:2208ms step_avg:147.19ms step:26/3200 train_loss:6.0925 train_time:2360ms step_avg:147.49ms step:27/3200 train_loss:6.0129 train_time:2507ms step_avg:147.47ms step:28/3200 train_loss:6.1853 train_time:2657ms step_avg:147.61ms step:29/3200 train_loss:5.8606 train_time:2807ms step_avg:147.75ms step:30/3200 train_loss:6.1140 train_time:2957ms step_avg:147.87ms step:31/3200 train_loss:5.9525 train_time:3108ms step_avg:147.99ms step:32/3200 train_loss:5.9111 train_time:3258ms step_avg:148.07ms step:33/3200 train_loss:5.7654 train_time:3410ms step_avg:148.25ms step:34/3200 train_loss:6.0366 train_time:3560ms step_avg:148.32ms step:35/3200 train_loss:5.9739 train_time:3711ms step_avg:148.46ms step:36/3200 train_loss:6.1071 train_time:3860ms step_avg:148.46ms step:37/3200 train_loss:6.0327 train_time:4011ms step_avg:148.57ms step:38/3200 train_loss:5.9370 train_time:4161ms step_avg:148.61ms step:39/3200 train_loss:5.8144 train_time:4313ms step_avg:148.73ms step:40/3200 train_loss:5.8386 train_time:4462ms step_avg:148.75ms step:41/3200 train_loss:5.7573 train_time:4615ms step_avg:148.86ms step:42/3200 train_loss:5.7615 train_time:4764ms step_avg:148.87ms step:43/3200 train_loss:5.6608 train_time:4915ms step_avg:148.95ms step:44/3200 train_loss:5.7371 train_time:5064ms step_avg:148.95ms step:45/3200 train_loss:5.7333 train_time:5215ms step_avg:149.01ms step:46/3200 train_loss:5.8707 train_time:5366ms step_avg:149.05ms step:47/3200 train_loss:5.6608 train_time:5516ms step_avg:149.09ms step:48/3200 train_loss:5.5273 train_time:5667ms step_avg:149.13ms step:49/3200 train_loss:5.7242 train_time:5818ms step_avg:149.17ms step:50/3200 train_loss:5.5980 train_time:5968ms step_avg:149.21ms step:51/3200 train_loss:5.7423 train_time:6118ms step_avg:149.21ms step:52/3200 train_loss:5.6138 train_time:6268ms step_avg:149.25ms step:53/3200 train_loss:5.4577 train_time:6418ms step_avg:149.26ms step:54/3200 train_loss:5.5952 train_time:6570ms step_avg:149.33ms step:55/3200 train_loss:5.4717 train_time:6720ms step_avg:149.33ms step:56/3200 train_loss:5.8091 train_time:6872ms step_avg:149.40ms step:57/3200 train_loss:5.4620 train_time:7022ms step_avg:149.41ms step:58/3200 train_loss:5.3343 train_time:7175ms step_avg:149.48ms step:59/3200 train_loss:5.4592 train_time:7326ms step_avg:149.50ms step:60/3200 train_loss:5.4275 train_time:7476ms step_avg:149.52ms step:61/3200 train_loss:5.5270 train_time:7627ms step_avg:149.54ms step:62/3200 train_loss:5.2876 train_time:7778ms step_avg:149.57ms step:63/3200 train_loss:5.3886 train_time:7929ms step_avg:149.60ms step:64/3200 train_loss:5.3747 train_time:8079ms step_avg:149.61ms step:65/3200 train_loss:5.1740 train_time:8228ms step_avg:149.61ms step:66/3200 train_loss:5.1900 train_time:8380ms step_avg:149.64ms step:67/3200 train_loss:5.3519 train_time:8533ms step_avg:149.70ms step:68/3200 train_loss:5.2156 train_time:8683ms step_avg:149.71ms step:69/3200 train_loss:5.4584 train_time:8835ms step_avg:149.74ms step:70/3200 train_loss:5.1050 train_time:8984ms step_avg:149.74ms step:71/3200 train_loss:5.1717 train_time:9135ms step_avg:149.75ms step:72/3200 train_loss:5.3368 train_time:9285ms step_avg:149.76ms step:73/3200 train_loss:5.2663 train_time:9436ms step_avg:149.78ms step:74/3200 train_loss:5.1579 train_time:9586ms step_avg:149.78ms step:75/3200 train_loss:5.2757 train_time:9736ms step_avg:149.79ms step:76/3200 train_loss:5.2511 train_time:9887ms step_avg:149.80ms step:77/3200 train_loss:5.1875 train_time:10036ms step_avg:149.80ms step:78/3200 train_loss:5.2877 train_time:10186ms step_avg:149.80ms step:79/3200 train_loss:5.3977 train_time:10337ms step_avg:149.81ms step:80/3200 train_loss:5.1538 train_time:10490ms step_avg:149.86ms step:81/3200 train_loss:5.2333 train_time:10639ms step_avg:149.84ms step:82/3200 train_loss:4.9945 train_time:10791ms step_avg:149.88ms step:83/3200 train_loss:5.1925 train_time:10942ms step_avg:149.89ms step:84/3200 train_loss:5.1419 train_time:11094ms step_avg:149.92ms step:85/3200 train_loss:5.1125 train_time:11245ms step_avg:149.94ms step:86/3200 train_loss:4.9786 train_time:11395ms step_avg:149.94ms step:87/3200 train_loss:5.1799 train_time:11546ms step_avg:149.94ms step:88/3200 train_loss:5.0790 train_time:11697ms step_avg:149.96ms step:89/3200 train_loss:5.1389 train_time:11849ms step_avg:149.99ms step:90/3200 train_loss:5.1111 train_time:11999ms step_avg:149.98ms step:91/3200 train_loss:5.0085 train_time:12151ms step_avg:150.01ms step:92/3200 train_loss:5.0157 train_time:12300ms step_avg:149.99ms step:93/3200 train_loss:5.1357 train_time:12451ms step_avg:150.01ms step:94/3200 train_loss:4.9647 train_time:12601ms step_avg:150.01ms step:95/3200 train_loss:4.9664 train_time:12753ms step_avg:150.04ms step:96/3200 train_loss:5.0207 train_time:12903ms step_avg:150.04ms step:97/3200 train_loss:4.9096 train_time:13055ms step_avg:150.05ms step:98/3200 train_loss:4.9869 train_time:13205ms step_avg:150.05ms step:99/3200 train_loss:4.9228 train_time:13356ms step_avg:150.07ms step:100/3200 train_loss:5.0361 train_time:13505ms step_avg:150.06ms step:101/3200 train_loss:5.0038 train_time:13657ms step_avg:150.07ms step:102/3200 train_loss:4.8875 train_time:13807ms step_avg:150.08ms step:103/3200 train_loss:5.0236 train_time:13958ms step_avg:150.09ms step:104/3200 train_loss:4.9650 train_time:14110ms step_avg:150.10ms step:105/3200 train_loss:4.8353 train_time:14259ms step_avg:150.10ms step:106/3200 train_loss:4.8858 train_time:14411ms step_avg:150.12ms step:107/3200 train_loss:5.0716 train_time:14562ms step_avg:150.12ms step:108/3200 train_loss:4.8675 train_time:14714ms step_avg:150.15ms step:109/3200 train_loss:4.6785 train_time:14864ms step_avg:150.14ms step:110/3200 train_loss:4.8379 train_time:15015ms step_avg:150.15ms step:111/3200 train_loss:4.8302 train_time:15164ms step_avg:150.14ms step:112/3200 train_loss:4.7851 train_time:15315ms step_avg:150.15ms step:113/3200 train_loss:4.9163 train_time:15464ms step_avg:150.13ms step:114/3200 train_loss:4.8197 train_time:15616ms step_avg:150.15ms step:115/3200 train_loss:4.6718 train_time:15765ms step_avg:150.14ms step:116/3200 train_loss:4.8271 train_time:15916ms step_avg:150.15ms step:117/3200 train_loss:4.7565 train_time:16065ms step_avg:150.14ms step:118/3200 train_loss:4.6988 train_time:16216ms step_avg:150.14ms step:119/3200 train_loss:4.8537 train_time:16366ms step_avg:150.14ms step:120/3200 train_loss:4.7824 train_time:16516ms step_avg:150.14ms step:121/3200 train_loss:4.6950 train_time:16667ms step_avg:150.15ms step:122/3200 train_loss:4.6277 train_time:16817ms step_avg:150.15ms step:123/3200 train_loss:4.7534 train_time:16966ms step_avg:150.14ms step:124/3200 train_loss:4.6038 train_time:17116ms step_avg:150.14ms step:125/3200 train_loss:4.9054 train_time:17266ms step_avg:150.14ms step:125/3200 val_loss:4.7286 train_time:17312ms step_avg:150.54ms step:126/3200 train_loss:4.7703 train_time:17423ms step_avg:150.20ms step:127/3200 train_loss:4.7180 train_time:17577ms step_avg:150.23ms step:128/3200 train_loss:4.7533 train_time:17727ms step_avg:150.23ms step:129/3200 train_loss:4.6548 train_time:17877ms step_avg:150.22ms step:130/3200 train_loss:4.9446 train_time:18025ms step_avg:150.21ms step:131/3200 train_loss:4.6755 train_time:18175ms step_avg:150.20ms step:132/3200 train_loss:4.6809 train_time:18324ms step_avg:150.20ms step:133/3200 train_loss:4.6238 train_time:18479ms step_avg:150.23ms step:134/3200 train_loss:4.6993 train_time:18630ms step_avg:150.24ms step:135/3200 train_loss:4.5588 train_time:18780ms step_avg:150.24ms step:136/3200 train_loss:4.6935 train_time:18929ms step_avg:150.23ms step:137/3200 train_loss:4.4821 train_time:19078ms step_avg:150.22ms step:138/3200 train_loss:4.6494 train_time:19227ms step_avg:150.21ms step:139/3200 train_loss:4.5675 train_time:19378ms step_avg:150.22ms step:140/3200 train_loss:4.6308 train_time:19529ms step_avg:150.22ms step:141/3200 train_loss:4.6874 train_time:19680ms step_avg:150.23ms step:142/3200 train_loss:4.5549 train_time:19831ms step_avg:150.23ms step:143/3200 train_loss:4.5731 train_time:19980ms step_avg:150.22ms step:144/3200 train_loss:4.4676 train_time:20130ms step_avg:150.22ms step:145/3200 train_loss:4.5868 train_time:20280ms step_avg:150.22ms step:146/3200 train_loss:4.5459 train_time:20431ms step_avg:150.23ms step:147/3200 train_loss:4.4238 train_time:20581ms step_avg:150.23ms step:148/3200 train_loss:4.5528 train_time:20735ms step_avg:150.25ms step:149/3200 train_loss:4.5704 train_time:20884ms step_avg:150.24ms step:150/3200 train_loss:4.5369 train_time:21036ms step_avg:150.25ms step:151/3200 train_loss:4.6383 train_time:21184ms step_avg:150.24ms step:152/3200 train_loss:4.4918 train_time:21336ms step_avg:150.26ms step:153/3200 train_loss:4.4860 train_time:21486ms step_avg:150.25ms step:154/3200 train_loss:4.5544 train_time:21636ms step_avg:150.25ms step:155/3200 train_loss:4.5386 train_time:21785ms step_avg:150.24ms step:156/3200 train_loss:4.4681 train_time:21937ms step_avg:150.25ms step:157/3200 train_loss:4.5184 train_time:22087ms step_avg:150.25ms step:158/3200 train_loss:4.6051 train_time:22237ms step_avg:150.25ms step:159/3200 train_loss:4.4251 train_time:22386ms step_avg:150.24ms step:160/3200 train_loss:4.4867 train_time:22538ms step_avg:150.25ms step:161/3200 train_loss:4.3026 train_time:22688ms step_avg:150.25ms step:162/3200 train_loss:4.5090 train_time:22838ms step_avg:150.25ms step:163/3200 train_loss:4.5164 train_time:22988ms step_avg:150.25ms step:164/3200 train_loss:4.4990 train_time:23139ms step_avg:150.25ms step:165/3200 train_loss:4.3586 train_time:23288ms step_avg:150.25ms step:166/3200 train_loss:4.4485 train_time:23439ms step_avg:150.25ms step:167/3200 train_loss:4.5264 train_time:23588ms step_avg:150.24ms step:168/3200 train_loss:4.3554 train_time:23739ms step_avg:150.24ms step:169/3200 train_loss:4.4386 train_time:23889ms step_avg:150.24ms step:170/3200 train_loss:4.3332 train_time:24039ms step_avg:150.24ms step:171/3200 train_loss:4.2145 train_time:24188ms step_avg:150.24ms step:172/3200 train_loss:4.3482 train_time:24338ms step_avg:150.24ms step:173/3200 train_loss:4.3624 train_time:24488ms step_avg:150.23ms step:174/3200 train_loss:4.4043 train_time:24639ms step_avg:150.24ms step:175/3200 train_loss:4.5728 train_time:24789ms step_avg:150.24ms step:176/3200 train_loss:4.4060 train_time:24940ms step_avg:150.24ms step:177/3200 train_loss:4.2608 train_time:25091ms step_avg:150.24ms step:178/3200 train_loss:4.2234 train_time:25241ms step_avg:150.24ms step:179/3200 train_loss:4.3275 train_time:25391ms step_avg:150.24ms step:180/3200 train_loss:4.2878 train_time:25540ms step_avg:150.24ms step:181/3200 train_loss:4.2594 train_time:25692ms step_avg:150.25ms step:182/3200 train_loss:4.4299 train_time:25841ms step_avg:150.24ms step:183/3200 train_loss:4.2912 train_time:25994ms step_avg:150.25ms step:184/3200 train_loss:4.2751 train_time:26143ms step_avg:150.25ms step:185/3200 train_loss:4.2680 train_time:26295ms step_avg:150.26ms step:186/3200 train_loss:4.3617 train_time:26446ms step_avg:150.26ms step:187/3200 train_loss:4.3165 train_time:26597ms step_avg:150.26ms step:188/3200 train_loss:4.3870 train_time:26746ms step_avg:150.26ms step:189/3200 train_loss:4.3146 train_time:27059ms step_avg:151.17ms step:190/3200 train_loss:4.2535 train_time:27390ms step_avg:152.17ms step:191/3200 train_loss:4.3439 train_time:27548ms step_avg:152.20ms step:192/3200 train_loss:4.2218 train_time:27697ms step_avg:152.18ms step:193/3200 train_loss:4.1480 train_time:27846ms step_avg:152.16ms step:194/3200 train_loss:4.3829 train_time:27995ms step_avg:152.15ms step:195/3200 train_loss:4.2886 train_time:28143ms step_avg:152.12ms step:196/3200 train_loss:4.4908 train_time:28294ms step_avg:152.12ms step:197/3200 train_loss:4.3191 train_time:28447ms step_avg:152.12ms step:198/3200 train_loss:4.1653 train_time:28599ms step_avg:152.12ms step:199/3200 train_loss:4.3028 train_time:28748ms step_avg:152.11ms step:200/3200 train_loss:4.1535 train_time:28897ms step_avg:152.09ms step:201/3200 train_loss:4.2497 train_time:29047ms step_avg:152.08ms step:202/3200 train_loss:4.1361 train_time:29197ms step_avg:152.07ms step:203/3200 train_loss:4.3664 train_time:29347ms step_avg:152.06ms step:204/3200 train_loss:4.1935 train_time:29499ms step_avg:152.06ms step:205/3200 train_loss:4.3121 train_time:29649ms step_avg:152.05ms step:206/3200 train_loss:4.3606 train_time:29799ms step_avg:152.03ms step:207/3200 train_loss:4.0663 train_time:29948ms step_avg:152.02ms step:208/3200 train_loss:4.2105 train_time:30098ms step_avg:152.01ms step:209/3200 train_loss:4.2092 train_time:30249ms step_avg:152.00ms step:210/3200 train_loss:4.3592 train_time:30400ms step_avg:152.00ms step:211/3200 train_loss:4.2945 train_time:30553ms step_avg:152.00ms step:212/3200 train_loss:4.1744 train_time:30701ms step_avg:151.99ms step:213/3200 train_loss:4.1969 train_time:30852ms step_avg:151.98ms step:214/3200 train_loss:4.1592 train_time:31001ms step_avg:151.96ms step:215/3200 train_loss:4.2355 train_time:31152ms step_avg:151.96ms step:216/3200 train_loss:4.0574 train_time:31301ms step_avg:151.95ms step:217/3200 train_loss:4.1142 train_time:31454ms step_avg:151.95ms step:218/3200 train_loss:4.1184 train_time:31604ms step_avg:151.94ms step:219/3200 train_loss:4.1935 train_time:31756ms step_avg:151.94ms step:220/3200 train_loss:4.1923 train_time:31905ms step_avg:151.93ms step:221/3200 train_loss:4.1970 train_time:32056ms step_avg:151.93ms step:222/3200 train_loss:4.2172 train_time:32206ms step_avg:151.92ms step:223/3200 train_loss:4.1291 train_time:32356ms step_avg:151.91ms step:224/3200 train_loss:4.0935 train_time:32507ms step_avg:151.90ms step:225/3200 train_loss:4.3998 train_time:32657ms step_avg:151.89ms step:226/3200 train_loss:4.0134 train_time:32808ms step_avg:151.89ms step:227/3200 train_loss:4.0862 train_time:32958ms step_avg:151.88ms step:228/3200 train_loss:4.0966 train_time:33109ms step_avg:151.88ms step:229/3200 train_loss:4.2422 train_time:33258ms step_avg:151.86ms step:230/3200 train_loss:4.0296 train_time:33408ms step_avg:151.85ms step:231/3200 train_loss:4.1555 train_time:33558ms step_avg:151.85ms step:232/3200 train_loss:4.0073 train_time:33708ms step_avg:151.84ms step:233/3200 train_loss:4.0771 train_time:33858ms step_avg:151.83ms step:234/3200 train_loss:4.1990 train_time:34008ms step_avg:151.82ms step:235/3200 train_loss:4.1203 train_time:34159ms step_avg:151.82ms step:236/3200 train_loss:4.0029 train_time:34311ms step_avg:151.82ms step:237/3200 train_loss:4.1742 train_time:34461ms step_avg:151.81ms step:238/3200 train_loss:4.1835 train_time:34613ms step_avg:151.81ms step:239/3200 train_loss:4.0446 train_time:34763ms step_avg:151.80ms step:240/3200 train_loss:4.1819 train_time:34914ms step_avg:151.80ms step:241/3200 train_loss:4.2121 train_time:35063ms step_avg:151.79ms step:242/3200 train_loss:4.0658 train_time:35215ms step_avg:151.79ms step:243/3200 train_loss:4.2436 train_time:35365ms step_avg:151.78ms step:244/3200 train_loss:4.1158 train_time:35517ms step_avg:151.78ms step:245/3200 train_loss:4.1690 train_time:35666ms step_avg:151.77ms step:246/3200 train_loss:4.2391 train_time:35816ms step_avg:151.76ms step:247/3200 train_loss:4.1667 train_time:35966ms step_avg:151.76ms step:248/3200 train_loss:4.1084 train_time:36116ms step_avg:151.75ms step:249/3200 train_loss:4.2133 train_time:36266ms step_avg:151.74ms step:250/3200 train_loss:4.0206 train_time:36417ms step_avg:151.74ms step:250/3200 val_loss:4.1059 train_time:36464ms step_avg:151.93ms step:251/3200 train_loss:4.0686 train_time:36572ms step_avg:151.75ms step:252/3200 train_loss:4.1732 train_time:36725ms step_avg:151.75ms step:253/3200 train_loss:4.2504 train_time:36874ms step_avg:151.74ms step:254/3200 train_loss:4.0252 train_time:37022ms step_avg:151.73ms step:255/3200 train_loss:3.9826 train_time:37170ms step_avg:151.71ms step:256/3200 train_loss:4.1659 train_time:37319ms step_avg:151.70ms step:257/3200 train_loss:4.0666 train_time:37470ms step_avg:151.70ms step:258/3200 train_loss:4.0782 train_time:37624ms step_avg:151.71ms step:259/3200 train_loss:4.0643 train_time:37777ms step_avg:151.71ms step:260/3200 train_loss:4.1188 train_time:37925ms step_avg:151.70ms step:261/3200 train_loss:4.1446 train_time:38076ms step_avg:151.70ms step:262/3200 train_loss:4.1136 train_time:38224ms step_avg:151.68ms step:263/3200 train_loss:4.0823 train_time:38374ms step_avg:151.68ms step:264/3200 train_loss:3.9903 train_time:38524ms step_avg:151.67ms step:265/3200 train_loss:4.0779 train_time:38675ms step_avg:151.67ms step:266/3200 train_loss:3.9504 train_time:38826ms step_avg:151.66ms step:267/3200 train_loss:4.0044 train_time:38978ms step_avg:151.66ms step:268/3200 train_loss:4.0123 train_time:39126ms step_avg:151.65ms step:269/3200 train_loss:4.0404 train_time:39276ms step_avg:151.64ms step:270/3200 train_loss:3.9451 train_time:39425ms step_avg:151.64ms step:271/3200 train_loss:4.1824 train_time:39578ms step_avg:151.64ms step:272/3200 train_loss:4.0731 train_time:39727ms step_avg:151.63ms step:273/3200 train_loss:4.0100 train_time:39879ms step_avg:151.63ms step:274/3200 train_loss:4.0452 train_time:40028ms step_avg:151.62ms step:275/3200 train_loss:4.1257 train_time:40179ms step_avg:151.62ms step:276/3200 train_loss:4.1468 train_time:40329ms step_avg:151.61ms step:277/3200 train_loss:4.3185 train_time:40480ms step_avg:151.61ms step:278/3200 train_loss:4.1230 train_time:40631ms step_avg:151.61ms step:279/3200 train_loss:4.1710 train_time:40781ms step_avg:151.60ms step:280/3200 train_loss:4.0875 train_time:40931ms step_avg:151.60ms step:281/3200 train_loss:4.2404 train_time:41081ms step_avg:151.59ms step:282/3200 train_loss:4.0435 train_time:41230ms step_avg:151.58ms step:283/3200 train_loss:4.0308 train_time:41381ms step_avg:151.58ms step:284/3200 train_loss:3.9976 train_time:41531ms step_avg:151.57ms step:285/3200 train_loss:4.1318 train_time:41682ms step_avg:151.57ms step:286/3200 train_loss:4.1430 train_time:41833ms step_avg:151.57ms step:287/3200 train_loss:4.1759 train_time:41982ms step_avg:151.56ms step:288/3200 train_loss:4.0010 train_time:42132ms step_avg:151.55ms step:289/3200 train_loss:4.1102 train_time:42281ms step_avg:151.54ms step:290/3200 train_loss:3.9583 train_time:42433ms step_avg:151.54ms step:291/3200 train_loss:3.9509 train_time:42582ms step_avg:151.54ms step:292/3200 train_loss:4.0217 train_time:42732ms step_avg:151.53ms step:293/3200 train_loss:3.9538 train_time:42883ms step_avg:151.53ms step:294/3200 train_loss:3.9962 train_time:43034ms step_avg:151.53ms step:295/3200 train_loss:4.0380 train_time:43183ms step_avg:151.52ms step:296/3200 train_loss:3.9274 train_time:43334ms step_avg:151.52ms step:297/3200 train_loss:3.9468 train_time:43484ms step_avg:151.51ms step:298/3200 train_loss:3.9445 train_time:43634ms step_avg:151.51ms step:299/3200 train_loss:4.0506 train_time:43783ms step_avg:151.50ms step:300/3200 train_loss:3.9118 train_time:43933ms step_avg:151.49ms step:301/3200 train_loss:4.0517 train_time:44083ms step_avg:151.49ms step:302/3200 train_loss:4.0652 train_time:44232ms step_avg:151.48ms step:303/3200 train_loss:4.0180 train_time:44383ms step_avg:151.48ms step:304/3200 train_loss:4.0660 train_time:44532ms step_avg:151.47ms step:305/3200 train_loss:4.0429 train_time:44682ms step_avg:151.47ms step:306/3200 train_loss:4.5259 train_time:44833ms step_avg:151.46ms step:307/3200 train_loss:4.0166 train_time:44983ms step_avg:151.46ms step:308/3200 train_loss:3.9304 train_time:45135ms step_avg:151.46ms step:309/3200 train_loss:4.0592 train_time:45284ms step_avg:151.45ms step:310/3200 train_loss:3.9472 train_time:45435ms step_avg:151.45ms step:311/3200 train_loss:4.1650 train_time:45585ms step_avg:151.45ms step:312/3200 train_loss:4.0054 train_time:45738ms step_avg:151.45ms step:313/3200 train_loss:3.9545 train_time:45888ms step_avg:151.44ms step:314/3200 train_loss:4.0469 train_time:46039ms step_avg:151.45ms step:315/3200 train_loss:4.1632 train_time:46190ms step_avg:151.44ms step:316/3200 train_loss:4.0350 train_time:46340ms step_avg:151.44ms step:317/3200 train_loss:3.8769 train_time:46491ms step_avg:151.44ms step:318/3200 train_loss:3.9568 train_time:46641ms step_avg:151.43ms step:319/3200 train_loss:3.9965 train_time:46790ms step_avg:151.42ms step:320/3200 train_loss:3.9687 train_time:46942ms step_avg:151.43ms step:321/3200 train_loss:4.0837 train_time:47092ms step_avg:151.42ms step:322/3200 train_loss:4.0307 train_time:47241ms step_avg:151.42ms step:323/3200 train_loss:4.0079 train_time:47390ms step_avg:151.41ms step:324/3200 train_loss:4.0899 train_time:47543ms step_avg:151.41ms step:325/3200 train_loss:4.0388 train_time:47693ms step_avg:151.41ms step:326/3200 train_loss:4.1028 train_time:47843ms step_avg:151.40ms step:327/3200 train_loss:3.9704 train_time:47993ms step_avg:151.40ms step:328/3200 train_loss:4.4862 train_time:48142ms step_avg:151.39ms step:329/3200 train_loss:4.1544 train_time:48292ms step_avg:151.39ms step:330/3200 train_loss:3.8965 train_time:48442ms step_avg:151.38ms step:331/3200 train_loss:3.8433 train_time:48594ms step_avg:151.38ms step:332/3200 train_loss:4.0634 train_time:48743ms step_avg:151.37ms step:333/3200 train_loss:3.9869 train_time:48894ms step_avg:151.37ms step:334/3200 train_loss:3.9617 train_time:49044ms step_avg:151.37ms step:335/3200 train_loss:3.9267 train_time:49195ms step_avg:151.37ms step:336/3200 train_loss:4.0964 train_time:49343ms step_avg:151.36ms step:337/3200 train_loss:4.0402 train_time:49493ms step_avg:151.36ms step:338/3200 train_loss:4.5065 train_time:49645ms step_avg:151.36ms step:339/3200 train_loss:4.0159 train_time:49796ms step_avg:151.36ms step:340/3200 train_loss:3.9715 train_time:49946ms step_avg:151.35ms step:341/3200 train_loss:4.0147 train_time:50098ms step_avg:151.35ms step:342/3200 train_loss:3.9348 train_time:50246ms step_avg:151.34ms step:343/3200 train_loss:3.8978 train_time:50396ms step_avg:151.34ms step:344/3200 train_loss:3.9301 train_time:50546ms step_avg:151.33ms step:345/3200 train_loss:4.0711 train_time:50698ms step_avg:151.34ms step:346/3200 train_loss:3.9126 train_time:50847ms step_avg:151.33ms step:347/3200 train_loss:3.8497 train_time:50999ms step_avg:151.33ms step:348/3200 train_loss:3.8846 train_time:51147ms step_avg:151.32ms step:349/3200 train_loss:3.9471 train_time:51298ms step_avg:151.32ms step:350/3200 train_loss:3.9136 train_time:51447ms step_avg:151.32ms step:351/3200 train_loss:3.6415 train_time:51601ms step_avg:151.32ms step:352/3200 train_loss:3.9112 train_time:51750ms step_avg:151.31ms step:353/3200 train_loss:4.2493 train_time:51901ms step_avg:151.32ms step:354/3200 train_loss:3.7443 train_time:52050ms step_avg:151.31ms step:355/3200 train_loss:4.0120 train_time:52200ms step_avg:151.30ms step:356/3200 train_loss:3.8698 train_time:52349ms step_avg:151.30ms step:357/3200 train_loss:3.9767 train_time:52501ms step_avg:151.30ms step:358/3200 train_loss:3.8921 train_time:52650ms step_avg:151.29ms step:359/3200 train_loss:3.9363 train_time:52802ms step_avg:151.29ms step:360/3200 train_loss:3.9230 train_time:52951ms step_avg:151.29ms step:361/3200 train_loss:3.5254 train_time:53102ms step_avg:151.29ms step:362/3200 train_loss:4.1022 train_time:53251ms step_avg:151.28ms step:363/3200 train_loss:4.0000 train_time:53401ms step_avg:151.28ms step:364/3200 train_loss:3.9277 train_time:53550ms step_avg:151.27ms step:365/3200 train_loss:3.8339 train_time:53701ms step_avg:151.27ms step:366/3200 train_loss:3.9997 train_time:53851ms step_avg:151.27ms step:367/3200 train_loss:3.9515 train_time:54003ms step_avg:151.27ms step:368/3200 train_loss:3.9479 train_time:54153ms step_avg:151.26ms step:369/3200 train_loss:3.9397 train_time:54303ms step_avg:151.26ms step:370/3200 train_loss:3.8311 train_time:54452ms step_avg:151.26ms step:371/3200 train_loss:3.9762 train_time:54603ms step_avg:151.26ms step:372/3200 train_loss:3.8411 train_time:54753ms step_avg:151.25ms step:373/3200 train_loss:3.7868 train_time:54903ms step_avg:151.25ms step:374/3200 train_loss:4.0012 train_time:55053ms step_avg:151.24ms step:375/3200 train_loss:3.9223 train_time:55203ms step_avg:151.24ms step:375/3200 val_loss:3.9163 train_time:55249ms step_avg:151.37ms step:376/3200 train_loss:3.8955 train_time:55359ms step_avg:151.25ms step:377/3200 train_loss:3.9528 train_time:55511ms step_avg:151.26ms step:378/3200 train_loss:3.8772 train_time:55827ms step_avg:151.70ms step:379/3200 train_loss:3.9331 train_time:55976ms step_avg:151.70ms step:380/3200 train_loss:3.9550 train_time:56297ms step_avg:152.15ms step:381/3200 train_loss:4.0360 train_time:56455ms step_avg:152.17ms step:382/3200 train_loss:3.9316 train_time:56604ms step_avg:152.16ms step:383/3200 train_loss:3.9018 train_time:56752ms step_avg:152.15ms step:384/3200 train_loss:3.8822 train_time:56901ms step_avg:152.14ms step:385/3200 train_loss:3.9597 train_time:57051ms step_avg:152.13ms step:386/3200 train_loss:3.8743 train_time:57200ms step_avg:152.13ms step:387/3200 train_loss:3.9773 train_time:57353ms step_avg:152.13ms step:388/3200 train_loss:4.1683 train_time:57504ms step_avg:152.13ms step:389/3200 train_loss:3.8893 train_time:57653ms step_avg:152.12ms step:390/3200 train_loss:3.8803 train_time:57803ms step_avg:152.11ms step:391/3200 train_loss:3.9843 train_time:57952ms step_avg:152.10ms step:392/3200 train_loss:3.9053 train_time:58102ms step_avg:152.10ms step:393/3200 train_loss:4.0103 train_time:58252ms step_avg:152.09ms step:394/3200 train_loss:3.8548 train_time:58405ms step_avg:152.10ms step:395/3200 train_loss:3.9803 train_time:58554ms step_avg:152.09ms step:396/3200 train_loss:3.7256 train_time:58705ms step_avg:152.09ms step:397/3200 train_loss:3.9317 train_time:58853ms step_avg:152.08ms step:398/3200 train_loss:3.9600 train_time:59005ms step_avg:152.07ms step:399/3200 train_loss:3.9730 train_time:59153ms step_avg:152.06ms step:400/3200 train_loss:3.8737 train_time:59305ms step_avg:152.06ms step:401/3200 train_loss:3.9249 train_time:59455ms step_avg:152.06ms step:402/3200 train_loss:3.9998 train_time:59607ms step_avg:152.06ms step:403/3200 train_loss:3.9275 train_time:59756ms step_avg:152.05ms step:404/3200 train_loss:4.0508 train_time:59908ms step_avg:152.05ms step:405/3200 train_loss:3.7932 train_time:60057ms step_avg:152.04ms step:406/3200 train_loss:3.8910 train_time:60208ms step_avg:152.04ms step:407/3200 train_loss:4.1864 train_time:60356ms step_avg:152.03ms step:408/3200 train_loss:3.8824 train_time:60508ms step_avg:152.03ms step:409/3200 train_loss:3.9145 train_time:60658ms step_avg:152.02ms step:410/3200 train_loss:3.9566 train_time:60808ms step_avg:152.02ms step:411/3200 train_loss:3.8537 train_time:60957ms step_avg:152.01ms step:412/3200 train_loss:3.8556 train_time:61108ms step_avg:152.01ms step:413/3200 train_loss:4.2865 train_time:61258ms step_avg:152.00ms step:414/3200 train_loss:3.7213 train_time:61408ms step_avg:152.00ms step:415/3200 train_loss:4.1004 train_time:61560ms step_avg:152.00ms step:416/3200 train_loss:3.8507 train_time:61709ms step_avg:151.99ms step:417/3200 train_loss:3.8664 train_time:61859ms step_avg:151.99ms step:418/3200 train_loss:4.0480 train_time:62009ms step_avg:151.98ms step:419/3200 train_loss:3.7831 train_time:62159ms step_avg:151.98ms step:420/3200 train_loss:3.9039 train_time:62309ms step_avg:151.97ms step:421/3200 train_loss:3.8223 train_time:62459ms step_avg:151.97ms step:422/3200 train_loss:3.7423 train_time:62609ms step_avg:151.96ms step:423/3200 train_loss:3.8752 train_time:62759ms step_avg:151.96ms step:424/3200 train_loss:3.9652 train_time:62909ms step_avg:151.95ms step:425/3200 train_loss:3.7205 train_time:63059ms step_avg:151.95ms step:426/3200 train_loss:3.9105 train_time:63209ms step_avg:151.94ms step:427/3200 train_loss:3.7841 train_time:63356ms step_avg:151.93ms step:428/3200 train_loss:3.9982 train_time:63509ms step_avg:151.93ms step:429/3200 train_loss:3.9108 train_time:63659ms step_avg:151.93ms step:430/3200 train_loss:3.8541 train_time:63809ms step_avg:151.93ms step:431/3200 train_loss:3.8248 train_time:63958ms step_avg:151.92ms step:432/3200 train_loss:3.7291 train_time:64109ms step_avg:151.92ms step:433/3200 train_loss:3.8656 train_time:64258ms step_avg:151.91ms step:434/3200 train_loss:3.9210 train_time:64408ms step_avg:151.91ms step:435/3200 train_loss:3.8689 train_time:64557ms step_avg:151.90ms step:436/3200 train_loss:3.9103 train_time:64709ms step_avg:151.90ms step:437/3200 train_loss:3.9272 train_time:64858ms step_avg:151.89ms step:438/3200 train_loss:3.8053 train_time:65010ms step_avg:151.89ms step:439/3200 train_loss:3.8183 train_time:65161ms step_avg:151.89ms step:440/3200 train_loss:3.8067 train_time:65310ms step_avg:151.88ms step:441/3200 train_loss:3.9854 train_time:65459ms step_avg:151.88ms step:442/3200 train_loss:3.8609 train_time:65609ms step_avg:151.87ms step:443/3200 train_loss:3.8538 train_time:65761ms step_avg:151.87ms step:444/3200 train_loss:3.7498 train_time:65910ms step_avg:151.87ms step:445/3200 train_loss:4.0156 train_time:66060ms step_avg:151.86ms step:446/3200 train_loss:3.9465 train_time:66210ms step_avg:151.86ms step:447/3200 train_loss:3.9381 train_time:66359ms step_avg:151.85ms step:448/3200 train_loss:3.8499 train_time:66509ms step_avg:151.85ms step:449/3200 train_loss:3.9542 train_time:66659ms step_avg:151.84ms step:450/3200 train_loss:3.7906 train_time:66809ms step_avg:151.84ms step:451/3200 train_loss:3.8216 train_time:66960ms step_avg:151.84ms step:452/3200 train_loss:3.6846 train_time:67109ms step_avg:151.83ms step:453/3200 train_loss:3.8083 train_time:67259ms step_avg:151.83ms step:454/3200 train_loss:3.7793 train_time:67409ms step_avg:151.82ms step:455/3200 train_loss:3.7417 train_time:67558ms step_avg:151.82ms step:456/3200 train_loss:3.9496 train_time:67708ms step_avg:151.81ms step:457/3200 train_loss:3.8329 train_time:67859ms step_avg:151.81ms step:458/3200 train_loss:3.8964 train_time:68009ms step_avg:151.81ms step:459/3200 train_loss:3.9379 train_time:68159ms step_avg:151.80ms step:460/3200 train_loss:3.7478 train_time:68309ms step_avg:151.80ms step:461/3200 train_loss:3.9044 train_time:68459ms step_avg:151.79ms step:462/3200 train_loss:3.8013 train_time:68609ms step_avg:151.79ms step:463/3200 train_loss:3.8330 train_time:68760ms step_avg:151.79ms step:464/3200 train_loss:3.8808 train_time:68910ms step_avg:151.78ms step:465/3200 train_loss:3.8195 train_time:69061ms step_avg:151.78ms step:466/3200 train_loss:3.8271 train_time:69210ms step_avg:151.78ms step:467/3200 train_loss:3.9194 train_time:69360ms step_avg:151.77ms step:468/3200 train_loss:3.9270 train_time:69510ms step_avg:151.77ms step:469/3200 train_loss:3.9066 train_time:69661ms step_avg:151.77ms step:470/3200 train_loss:3.8041 train_time:69810ms step_avg:151.76ms step:471/3200 train_loss:3.8735 train_time:69960ms step_avg:151.76ms step:472/3200 train_loss:3.9275 train_time:70110ms step_avg:151.75ms step:473/3200 train_loss:3.8769 train_time:70261ms step_avg:151.75ms step:474/3200 train_loss:3.8320 train_time:70410ms step_avg:151.75ms step:475/3200 train_loss:3.6967 train_time:70560ms step_avg:151.74ms step:476/3200 train_loss:4.1239 train_time:70710ms step_avg:151.74ms step:477/3200 train_loss:3.8733 train_time:70861ms step_avg:151.74ms step:478/3200 train_loss:3.6935 train_time:71010ms step_avg:151.73ms step:479/3200 train_loss:3.9237 train_time:71161ms step_avg:151.73ms step:480/3200 train_loss:3.8760 train_time:71310ms step_avg:151.72ms step:481/3200 train_loss:4.0213 train_time:71459ms step_avg:151.72ms step:482/3200 train_loss:3.8364 train_time:71610ms step_avg:151.72ms step:483/3200 train_loss:3.6368 train_time:71759ms step_avg:151.71ms step:484/3200 train_loss:3.9194 train_time:71910ms step_avg:151.71ms step:485/3200 train_loss:3.7742 train_time:72060ms step_avg:151.71ms step:486/3200 train_loss:3.7823 train_time:72211ms step_avg:151.70ms step:487/3200 train_loss:3.7146 train_time:72361ms step_avg:151.70ms step:488/3200 train_loss:3.7949 train_time:72510ms step_avg:151.70ms step:489/3200 train_loss:3.9854 train_time:72660ms step_avg:151.69ms step:490/3200 train_loss:3.8259 train_time:72810ms step_avg:151.69ms step:491/3200 train_loss:3.7109 train_time:72960ms step_avg:151.68ms step:492/3200 train_loss:3.7273 train_time:73111ms step_avg:151.68ms step:493/3200 train_loss:3.8430 train_time:73261ms step_avg:151.68ms step:494/3200 train_loss:3.6976 train_time:73410ms step_avg:151.67ms step:495/3200 train_loss:3.8234 train_time:73561ms step_avg:151.67ms step:496/3200 train_loss:3.7675 train_time:73710ms step_avg:151.67ms step:497/3200 train_loss:3.6448 train_time:73859ms step_avg:151.66ms step:498/3200 train_loss:3.8458 train_time:74009ms step_avg:151.66ms step:499/3200 train_loss:3.9134 train_time:74161ms step_avg:151.66ms step:500/3200 train_loss:3.9430 train_time:74310ms step_avg:151.65ms step:500/3200 val_loss:3.8214 train_time:74357ms step_avg:151.75ms step:501/3200 train_loss:3.8590 train_time:74465ms step_avg:151.66ms step:502/3200 train_loss:3.9145 train_time:74618ms step_avg:151.66ms step:503/3200 train_loss:3.8599 train_time:74767ms step_avg:151.66ms step:504/3200 train_loss:3.8942 train_time:74916ms step_avg:151.65ms step:505/3200 train_loss:3.8410 train_time:75064ms step_avg:151.64ms step:506/3200 train_loss:3.9313 train_time:75212ms step_avg:151.64ms step:507/3200 train_loss:3.7542 train_time:75361ms step_avg:151.63ms step:508/3200 train_loss:3.8824 train_time:75515ms step_avg:151.64ms step:509/3200 train_loss:3.9482 train_time:75665ms step_avg:151.63ms step:510/3200 train_loss:3.8892 train_time:75817ms step_avg:151.63ms step:511/3200 train_loss:3.6997 train_time:75967ms step_avg:151.63ms step:512/3200 train_loss:3.8995 train_time:76116ms step_avg:151.62ms step:513/3200 train_loss:3.8312 train_time:76264ms step_avg:151.62ms step:514/3200 train_loss:3.8038 train_time:76414ms step_avg:151.61ms step:515/3200 train_loss:3.8716 train_time:76563ms step_avg:151.61ms step:516/3200 train_loss:3.8529 train_time:76715ms step_avg:151.61ms step:517/3200 train_loss:4.2069 train_time:76864ms step_avg:151.61ms step:518/3200 train_loss:3.8067 train_time:77015ms step_avg:151.60ms step:519/3200 train_loss:3.9053 train_time:77163ms step_avg:151.60ms step:520/3200 train_loss:3.7900 train_time:77314ms step_avg:151.60ms step:521/3200 train_loss:3.8102 train_time:77463ms step_avg:151.59ms step:522/3200 train_loss:3.7636 train_time:77614ms step_avg:151.59ms step:523/3200 train_loss:3.7750 train_time:77763ms step_avg:151.59ms step:524/3200 train_loss:4.4095 train_time:77915ms step_avg:151.58ms step:525/3200 train_loss:3.8604 train_time:78062ms step_avg:151.58ms step:526/3200 train_loss:3.7996 train_time:78212ms step_avg:151.57ms step:527/3200 train_loss:3.8103 train_time:78360ms step_avg:151.57ms step:528/3200 train_loss:3.7727 train_time:78513ms step_avg:151.57ms step:529/3200 train_loss:3.7477 train_time:78661ms step_avg:151.56ms step:530/3200 train_loss:3.9620 train_time:78814ms step_avg:151.56ms step:531/3200 train_loss:3.7671 train_time:78962ms step_avg:151.56ms step:532/3200 train_loss:4.0353 train_time:79113ms step_avg:151.56ms step:533/3200 train_loss:3.8451 train_time:79261ms step_avg:151.55ms step:534/3200 train_loss:3.7734 train_time:79413ms step_avg:151.55ms step:535/3200 train_loss:3.8011 train_time:79562ms step_avg:151.55ms step:536/3200 train_loss:3.7375 train_time:79716ms step_avg:151.55ms step:537/3200 train_loss:3.8643 train_time:79864ms step_avg:151.55ms step:538/3200 train_loss:3.8524 train_time:80014ms step_avg:151.54ms step:539/3200 train_loss:3.7477 train_time:80163ms step_avg:151.54ms step:540/3200 train_loss:4.2465 train_time:80312ms step_avg:151.53ms step:541/3200 train_loss:3.7881 train_time:80461ms step_avg:151.53ms step:542/3200 train_loss:3.9019 train_time:80614ms step_avg:151.53ms step:543/3200 train_loss:3.7222 train_time:80764ms step_avg:151.53ms step:544/3200 train_loss:3.7034 train_time:80915ms step_avg:151.53ms step:545/3200 train_loss:3.7807 train_time:81065ms step_avg:151.52ms step:546/3200 train_loss:3.7107 train_time:81215ms step_avg:151.52ms step:547/3200 train_loss:3.7532 train_time:81363ms step_avg:151.51ms step:548/3200 train_loss:3.7646 train_time:81514ms step_avg:151.51ms step:549/3200 train_loss:3.7411 train_time:81662ms step_avg:151.51ms step:550/3200 train_loss:3.8385 train_time:81814ms step_avg:151.51ms step:551/3200 train_loss:3.7320 train_time:81963ms step_avg:151.50ms step:552/3200 train_loss:3.7467 train_time:82114ms step_avg:151.50ms step:553/3200 train_loss:4.0692 train_time:82263ms step_avg:151.50ms step:554/3200 train_loss:3.8714 train_time:82413ms step_avg:151.49ms step:555/3200 train_loss:3.8358 train_time:82561ms step_avg:151.49ms step:556/3200 train_loss:3.7742 train_time:82713ms step_avg:151.49ms step:557/3200 train_loss:3.8132 train_time:82862ms step_avg:151.48ms step:558/3200 train_loss:3.4729 train_time:83013ms step_avg:151.48ms step:559/3200 train_loss:3.7313 train_time:83163ms step_avg:151.48ms step:560/3200 train_loss:3.7685 train_time:83314ms step_avg:151.48ms step:561/3200 train_loss:3.8213 train_time:83462ms step_avg:151.47ms step:562/3200 train_loss:3.7334 train_time:83613ms step_avg:151.47ms step:563/3200 train_loss:3.6756 train_time:83762ms step_avg:151.47ms step:564/3200 train_loss:3.8820 train_time:83914ms step_avg:151.47ms step:565/3200 train_loss:3.6921 train_time:84062ms step_avg:151.46ms step:566/3200 train_loss:3.8027 train_time:84214ms step_avg:151.46ms step:567/3200 train_loss:3.7504 train_time:84523ms step_avg:151.75ms step:568/3200 train_loss:3.7173 train_time:84678ms step_avg:151.75ms step:569/3200 train_loss:3.7979 train_time:84827ms step_avg:151.75ms step:570/3200 train_loss:3.7764 train_time:85151ms step_avg:152.05ms step:571/3200 train_loss:3.8080 train_time:85297ms step_avg:152.04ms step:572/3200 train_loss:3.8915 train_time:85447ms step_avg:152.04ms step:573/3200 train_loss:3.8389 train_time:85594ms step_avg:152.03ms step:574/3200 train_loss:3.8510 train_time:85743ms step_avg:152.03ms step:575/3200 train_loss:3.8989 train_time:85892ms step_avg:152.02ms step:576/3200 train_loss:3.8521 train_time:86044ms step_avg:152.02ms step:577/3200 train_loss:3.8792 train_time:86196ms step_avg:152.02ms step:578/3200 train_loss:3.8050 train_time:86347ms step_avg:152.02ms step:579/3200 train_loss:3.7956 train_time:86496ms step_avg:152.01ms step:580/3200 train_loss:3.7827 train_time:86647ms step_avg:152.01ms step:581/3200 train_loss:3.7198 train_time:86795ms step_avg:152.01ms step:582/3200 train_loss:3.7526 train_time:86945ms step_avg:152.00ms step:583/3200 train_loss:3.9703 train_time:87095ms step_avg:152.00ms step:584/3200 train_loss:3.7438 train_time:87246ms step_avg:152.00ms step:585/3200 train_loss:3.7072 train_time:87396ms step_avg:151.99ms step:586/3200 train_loss:3.8976 train_time:87548ms step_avg:151.99ms step:587/3200 train_loss:3.6523 train_time:87697ms step_avg:151.99ms step:588/3200 train_loss:3.7850 train_time:87847ms step_avg:151.98ms step:589/3200 train_loss:3.7664 train_time:87996ms step_avg:151.98ms step:590/3200 train_loss:4.1124 train_time:88147ms step_avg:151.98ms step:591/3200 train_loss:3.9037 train_time:88296ms step_avg:151.97ms step:592/3200 train_loss:3.6381 train_time:88448ms step_avg:151.97ms step:593/3200 train_loss:3.6545 train_time:88596ms step_avg:151.97ms step:594/3200 train_loss:3.6384 train_time:88746ms step_avg:151.96ms step:595/3200 train_loss:3.6808 train_time:88896ms step_avg:151.96ms step:596/3200 train_loss:4.0473 train_time:89046ms step_avg:151.96ms step:597/3200 train_loss:3.7710 train_time:89196ms step_avg:151.95ms step:598/3200 train_loss:3.7051 train_time:89346ms step_avg:151.95ms step:599/3200 train_loss:3.7818 train_time:89496ms step_avg:151.94ms step:600/3200 train_loss:3.6001 train_time:89646ms step_avg:151.94ms step:601/3200 train_loss:3.7162 train_time:89795ms step_avg:151.94ms step:602/3200 train_loss:3.7577 train_time:89946ms step_avg:151.94ms step:603/3200 train_loss:3.7829 train_time:90096ms step_avg:151.93ms step:604/3200 train_loss:3.8996 train_time:90246ms step_avg:151.93ms step:605/3200 train_loss:3.7469 train_time:90395ms step_avg:151.92ms step:606/3200 train_loss:3.7360 train_time:90546ms step_avg:151.92ms step:607/3200 train_loss:3.6940 train_time:90696ms step_avg:151.92ms step:608/3200 train_loss:3.9416 train_time:90847ms step_avg:151.92ms step:609/3200 train_loss:3.7675 train_time:90997ms step_avg:151.91ms step:610/3200 train_loss:3.7339 train_time:91147ms step_avg:151.91ms step:611/3200 train_loss:3.8309 train_time:91295ms step_avg:151.91ms step:612/3200 train_loss:3.7345 train_time:91446ms step_avg:151.90ms step:613/3200 train_loss:3.7163 train_time:91595ms step_avg:151.90ms step:614/3200 train_loss:3.8849 train_time:91744ms step_avg:151.89ms step:615/3200 train_loss:3.8364 train_time:91895ms step_avg:151.89ms step:616/3200 train_loss:3.8114 train_time:92045ms step_avg:151.89ms step:617/3200 train_loss:3.7379 train_time:92195ms step_avg:151.89ms step:618/3200 train_loss:3.6883 train_time:92345ms step_avg:151.88ms step:619/3200 train_loss:3.7986 train_time:92496ms step_avg:151.88ms step:620/3200 train_loss:3.6920 train_time:92647ms step_avg:151.88ms step:621/3200 train_loss:3.7089 train_time:92797ms step_avg:151.88ms step:622/3200 train_loss:4.0267 train_time:92947ms step_avg:151.87ms step:623/3200 train_loss:3.7100 train_time:93096ms step_avg:151.87ms step:624/3200 train_loss:3.7387 train_time:93247ms step_avg:151.87ms step:625/3200 train_loss:3.8182 train_time:93397ms step_avg:151.86ms step:625/3200 val_loss:3.7484 train_time:93443ms step_avg:151.94ms step:626/3200 train_loss:3.8407 train_time:93554ms step_avg:151.87ms step:627/3200 train_loss:3.8627 train_time:93707ms step_avg:151.87ms step:628/3200 train_loss:3.8561 train_time:93855ms step_avg:151.87ms step:629/3200 train_loss:3.8898 train_time:94004ms step_avg:151.86ms step:630/3200 train_loss:3.7145 train_time:94152ms step_avg:151.86ms step:631/3200 train_loss:3.8414 train_time:94301ms step_avg:151.85ms step:632/3200 train_loss:3.8709 train_time:94449ms step_avg:151.85ms step:633/3200 train_loss:3.7752 train_time:94602ms step_avg:151.85ms step:634/3200 train_loss:3.7099 train_time:94753ms step_avg:151.85ms step:635/3200 train_loss:3.8031 train_time:94905ms step_avg:151.85ms step:636/3200 train_loss:4.0627 train_time:95054ms step_avg:151.84ms step:637/3200 train_loss:3.6583 train_time:95203ms step_avg:151.84ms step:638/3200 train_loss:3.4757 train_time:95353ms step_avg:151.84ms step:639/3200 train_loss:3.6992 train_time:95503ms step_avg:151.83ms step:640/3200 train_loss:3.7375 train_time:95653ms step_avg:151.83ms step:641/3200 train_loss:3.6870 train_time:95804ms step_avg:151.83ms step:642/3200 train_loss:3.6964 train_time:95955ms step_avg:151.83ms step:643/3200 train_loss:3.7418 train_time:96105ms step_avg:151.82ms step:644/3200 train_loss:3.7371 train_time:96254ms step_avg:151.82ms step:645/3200 train_loss:3.6758 train_time:96404ms step_avg:151.82ms step:646/3200 train_loss:3.8945 train_time:96553ms step_avg:151.81ms step:647/3200 train_loss:3.7971 train_time:96704ms step_avg:151.81ms step:648/3200 train_loss:3.7915 train_time:96855ms step_avg:151.81ms step:649/3200 train_loss:3.8217 train_time:97005ms step_avg:151.81ms step:650/3200 train_loss:3.8853 train_time:97156ms step_avg:151.81ms step:651/3200 train_loss:3.7469 train_time:97305ms step_avg:151.80ms step:652/3200 train_loss:3.8758 train_time:97455ms step_avg:151.80ms step:653/3200 train_loss:3.7069 train_time:97604ms step_avg:151.80ms step:654/3200 train_loss:3.7861 train_time:97754ms step_avg:151.79ms step:655/3200 train_loss:3.5520 train_time:97904ms step_avg:151.79ms step:656/3200 train_loss:3.6943 train_time:98054ms step_avg:151.79ms step:657/3200 train_loss:3.7032 train_time:98205ms step_avg:151.78ms step:658/3200 train_loss:3.6332 train_time:98354ms step_avg:151.78ms step:659/3200 train_loss:3.8062 train_time:98503ms step_avg:151.78ms step:660/3200 train_loss:3.7063 train_time:98653ms step_avg:151.77ms step:661/3200 train_loss:3.8040 train_time:98804ms step_avg:151.77ms step:662/3200 train_loss:3.8715 train_time:98954ms step_avg:151.77ms step:663/3200 train_loss:3.7863 train_time:99104ms step_avg:151.77ms step:664/3200 train_loss:3.6635 train_time:99255ms step_avg:151.77ms step:665/3200 train_loss:3.7459 train_time:99405ms step_avg:151.76ms step:666/3200 train_loss:3.6132 train_time:99555ms step_avg:151.76ms step:667/3200 train_loss:3.9018 train_time:99705ms step_avg:151.76ms step:668/3200 train_loss:3.7354 train_time:99855ms step_avg:151.76ms step:669/3200 train_loss:3.7535 train_time:100004ms step_avg:151.75ms step:670/3200 train_loss:3.6109 train_time:100154ms step_avg:151.75ms step:671/3200 train_loss:3.7279 train_time:100305ms step_avg:151.75ms step:672/3200 train_loss:3.6808 train_time:100455ms step_avg:151.74ms step:673/3200 train_loss:3.6935 train_time:100604ms step_avg:151.74ms step:674/3200 train_loss:3.9720 train_time:100755ms step_avg:151.74ms step:675/3200 train_loss:3.7532 train_time:100905ms step_avg:151.74ms step:676/3200 train_loss:3.8340 train_time:101055ms step_avg:151.73ms step:677/3200 train_loss:3.6141 train_time:101205ms step_avg:151.73ms step:678/3200 train_loss:3.7215 train_time:101356ms step_avg:151.73ms step:679/3200 train_loss:3.6701 train_time:101505ms step_avg:151.73ms step:680/3200 train_loss:3.8115 train_time:101655ms step_avg:151.72ms step:681/3200 train_loss:3.7077 train_time:101805ms step_avg:151.72ms step:682/3200 train_loss:3.7352 train_time:101955ms step_avg:151.72ms step:683/3200 train_loss:3.8098 train_time:102105ms step_avg:151.72ms step:684/3200 train_loss:3.8460 train_time:102256ms step_avg:151.72ms step:685/3200 train_loss:3.7554 train_time:102406ms step_avg:151.71ms step:686/3200 train_loss:3.8192 train_time:102556ms step_avg:151.71ms step:687/3200 train_loss:3.7526 train_time:102706ms step_avg:151.71ms step:688/3200 train_loss:3.8017 train_time:102855ms step_avg:151.70ms step:689/3200 train_loss:3.3979 train_time:103005ms step_avg:151.70ms step:690/3200 train_loss:3.5429 train_time:103155ms step_avg:151.70ms step:691/3200 train_loss:3.6750 train_time:103305ms step_avg:151.70ms step:692/3200 train_loss:3.5516 train_time:103454ms step_avg:151.69ms step:693/3200 train_loss:3.7605 train_time:103605ms step_avg:151.69ms step:694/3200 train_loss:3.7744 train_time:103756ms step_avg:151.69ms step:695/3200 train_loss:3.6638 train_time:103906ms step_avg:151.69ms step:696/3200 train_loss:3.6624 train_time:104056ms step_avg:151.68ms step:697/3200 train_loss:3.9787 train_time:104207ms step_avg:151.68ms step:698/3200 train_loss:3.7214 train_time:104356ms step_avg:151.68ms step:699/3200 train_loss:3.7686 train_time:104506ms step_avg:151.68ms step:700/3200 train_loss:3.9248 train_time:104657ms step_avg:151.68ms step:701/3200 train_loss:3.6919 train_time:104807ms step_avg:151.67ms step:702/3200 train_loss:3.6657 train_time:104957ms step_avg:151.67ms step:703/3200 train_loss:3.6405 train_time:105108ms step_avg:151.67ms step:704/3200 train_loss:3.6039 train_time:105259ms step_avg:151.67ms step:705/3200 train_loss:3.6854 train_time:105407ms step_avg:151.67ms step:706/3200 train_loss:3.6843 train_time:105560ms step_avg:151.67ms step:707/3200 train_loss:3.6953 train_time:105708ms step_avg:151.66ms step:708/3200 train_loss:3.7564 train_time:105860ms step_avg:151.66ms step:709/3200 train_loss:3.7159 train_time:106009ms step_avg:151.66ms step:710/3200 train_loss:3.6975 train_time:106161ms step_avg:151.66ms step:711/3200 train_loss:3.6666 train_time:106310ms step_avg:151.66ms step:712/3200 train_loss:3.7075 train_time:106462ms step_avg:151.66ms step:713/3200 train_loss:3.7684 train_time:106611ms step_avg:151.65ms step:714/3200 train_loss:3.7759 train_time:106761ms step_avg:151.65ms step:715/3200 train_loss:3.6892 train_time:106910ms step_avg:151.65ms step:716/3200 train_loss:3.6911 train_time:107062ms step_avg:151.65ms step:717/3200 train_loss:3.7078 train_time:107211ms step_avg:151.64ms step:718/3200 train_loss:3.8592 train_time:107363ms step_avg:151.64ms step:719/3200 train_loss:3.7097 train_time:107539ms step_avg:151.68ms step:720/3200 train_loss:3.7883 train_time:107698ms step_avg:151.69ms step:721/3200 train_loss:3.9619 train_time:107857ms step_avg:151.70ms step:722/3200 train_loss:3.5833 train_time:108005ms step_avg:151.69ms step:723/3200 train_loss:3.8517 train_time:108154ms step_avg:151.69ms step:724/3200 train_loss:3.8994 train_time:108302ms step_avg:151.68ms step:725/3200 train_loss:3.6853 train_time:108451ms step_avg:151.68ms step:726/3200 train_loss:3.7683 train_time:108603ms step_avg:151.68ms step:727/3200 train_loss:3.6633 train_time:108755ms step_avg:151.68ms step:728/3200 train_loss:3.6853 train_time:108906ms step_avg:151.68ms step:729/3200 train_loss:3.8593 train_time:109056ms step_avg:151.68ms step:730/3200 train_loss:3.7989 train_time:109204ms step_avg:151.67ms step:731/3200 train_loss:3.7971 train_time:109353ms step_avg:151.67ms step:732/3200 train_loss:3.6798 train_time:109502ms step_avg:151.67ms step:733/3200 train_loss:3.7119 train_time:109652ms step_avg:151.66ms step:734/3200 train_loss:3.9449 train_time:109804ms step_avg:151.66ms step:735/3200 train_loss:3.6813 train_time:109955ms step_avg:151.66ms step:736/3200 train_loss:3.7356 train_time:110106ms step_avg:151.66ms step:737/3200 train_loss:3.8627 train_time:110256ms step_avg:151.66ms step:738/3200 train_loss:3.7815 train_time:110405ms step_avg:151.65ms step:739/3200 train_loss:3.7258 train_time:110554ms step_avg:151.65ms step:740/3200 train_loss:3.6141 train_time:110705ms step_avg:151.65ms step:741/3200 train_loss:4.2573 train_time:110856ms step_avg:151.65ms step:742/3200 train_loss:3.6142 train_time:111006ms step_avg:151.65ms step:743/3200 train_loss:3.6987 train_time:111159ms step_avg:151.65ms step:744/3200 train_loss:3.7075 train_time:111307ms step_avg:151.65ms step:745/3200 train_loss:3.7679 train_time:111458ms step_avg:151.64ms step:746/3200 train_loss:3.7288 train_time:111606ms step_avg:151.64ms step:747/3200 train_loss:3.7168 train_time:111757ms step_avg:151.64ms step:748/3200 train_loss:3.7564 train_time:111907ms step_avg:151.64ms step:749/3200 train_loss:3.6836 train_time:112058ms step_avg:151.63ms step:750/3200 train_loss:3.6844 train_time:112207ms step_avg:151.63ms step:750/3200 val_loss:3.6920 train_time:112254ms step_avg:151.69ms step:751/3200 train_loss:3.7177 train_time:112366ms step_avg:151.64ms step:752/3200 train_loss:3.6840 train_time:112518ms step_avg:151.64ms step:753/3200 train_loss:3.7230 train_time:112668ms step_avg:151.64ms step:754/3200 train_loss:3.7400 train_time:112815ms step_avg:151.63ms step:755/3200 train_loss:3.7040 train_time:112964ms step_avg:151.63ms step:756/3200 train_loss:3.7871 train_time:113272ms step_avg:151.84ms step:757/3200 train_loss:3.6086 train_time:113429ms step_avg:151.85ms step:758/3200 train_loss:3.8459 train_time:113578ms step_avg:151.84ms step:759/3200 train_loss:3.7670 train_time:113729ms step_avg:151.84ms step:760/3200 train_loss:3.7017 train_time:114049ms step_avg:152.07ms step:761/3200 train_loss:3.8107 train_time:114198ms step_avg:152.06ms step:762/3200 train_loss:3.5268 train_time:114347ms step_avg:152.06ms step:763/3200 train_loss:3.6737 train_time:114495ms step_avg:152.05ms step:764/3200 train_loss:3.7850 train_time:114644ms step_avg:152.05ms step:765/3200 train_loss:3.4442 train_time:114791ms step_avg:152.04ms step:766/3200 train_loss:3.8603 train_time:114941ms step_avg:152.04ms step:767/3200 train_loss:3.7066 train_time:115097ms step_avg:152.04ms step:768/3200 train_loss:3.6783 train_time:115250ms step_avg:152.05ms step:769/3200 train_loss:3.6978 train_time:115401ms step_avg:152.04ms step:770/3200 train_loss:3.7226 train_time:115550ms step_avg:152.04ms step:771/3200 train_loss:3.7694 train_time:115700ms step_avg:152.04ms step:772/3200 train_loss:3.9995 train_time:115848ms step_avg:152.03ms step:773/3200 train_loss:3.5850 train_time:115999ms step_avg:152.03ms step:774/3200 train_loss:3.7702 train_time:116152ms step_avg:152.03ms step:775/3200 train_loss:3.7559 train_time:116303ms step_avg:152.03ms step:776/3200 train_loss:3.7296 train_time:116452ms step_avg:152.03ms step:777/3200 train_loss:3.5243 train_time:116600ms step_avg:152.02ms step:778/3200 train_loss:3.5269 train_time:116749ms step_avg:152.02ms step:779/3200 train_loss:3.6000 train_time:116898ms step_avg:152.01ms step:780/3200 train_loss:3.6871 train_time:117050ms step_avg:152.01ms step:781/3200 train_loss:3.7196 train_time:117201ms step_avg:152.01ms step:782/3200 train_loss:3.7863 train_time:117352ms step_avg:152.01ms step:783/3200 train_loss:3.7019 train_time:117502ms step_avg:152.01ms step:784/3200 train_loss:3.6860 train_time:117651ms step_avg:152.00ms step:785/3200 train_loss:3.7021 train_time:117801ms step_avg:152.00ms step:786/3200 train_loss:3.6767 train_time:117951ms step_avg:152.00ms step:787/3200 train_loss:3.5782 train_time:118101ms step_avg:152.00ms step:788/3200 train_loss:3.8250 train_time:118251ms step_avg:151.99ms step:789/3200 train_loss:3.6204 train_time:118403ms step_avg:151.99ms step:790/3200 train_loss:3.6753 train_time:118554ms step_avg:151.99ms step:791/3200 train_loss:3.7502 train_time:118706ms step_avg:151.99ms step:792/3200 train_loss:3.8858 train_time:118854ms step_avg:151.99ms step:793/3200 train_loss:3.8845 train_time:119006ms step_avg:151.99ms step:794/3200 train_loss:3.6042 train_time:119155ms step_avg:151.98ms step:795/3200 train_loss:3.7225 train_time:119307ms step_avg:151.98ms step:796/3200 train_loss:3.7848 train_time:119455ms step_avg:151.98ms step:797/3200 train_loss:3.8937 train_time:119607ms step_avg:151.98ms step:798/3200 train_loss:3.6349 train_time:119755ms step_avg:151.97ms step:799/3200 train_loss:3.7830 train_time:119907ms step_avg:151.97ms step:800/3200 train_loss:3.6745 train_time:120055ms step_avg:151.97ms step:801/3200 train_loss:3.6613 train_time:120207ms step_avg:151.97ms step:802/3200 train_loss:3.7489 train_time:120355ms step_avg:151.96ms step:803/3200 train_loss:3.6166 train_time:120508ms step_avg:151.96ms step:804/3200 train_loss:3.6424 train_time:120657ms step_avg:151.96ms step:805/3200 train_loss:3.7546 train_time:120807ms step_avg:151.96ms step:806/3200 train_loss:3.6585 train_time:120955ms step_avg:151.95ms step:807/3200 train_loss:3.6722 train_time:121107ms step_avg:151.95ms step:808/3200 train_loss:3.7646 train_time:121256ms step_avg:151.95ms step:809/3200 train_loss:3.6844 train_time:121408ms step_avg:151.95ms step:810/3200 train_loss:3.6034 train_time:121558ms step_avg:151.95ms step:811/3200 train_loss:3.6854 train_time:121708ms step_avg:151.95ms step:812/3200 train_loss:3.7200 train_time:121858ms step_avg:151.94ms step:813/3200 train_loss:3.7157 train_time:122009ms step_avg:151.94ms step:814/3200 train_loss:3.7512 train_time:122158ms step_avg:151.94ms step:815/3200 train_loss:3.6923 train_time:122309ms step_avg:151.94ms step:816/3200 train_loss:3.6863 train_time:122459ms step_avg:151.93ms step:817/3200 train_loss:3.7872 train_time:122611ms step_avg:151.93ms step:818/3200 train_loss:3.8857 train_time:122760ms step_avg:151.93ms step:819/3200 train_loss:3.6449 train_time:122911ms step_avg:151.93ms step:820/3200 train_loss:3.8443 train_time:123061ms step_avg:151.93ms step:821/3200 train_loss:3.6257 train_time:123211ms step_avg:151.93ms step:822/3200 train_loss:3.6714 train_time:123360ms step_avg:151.92ms step:823/3200 train_loss:3.7983 train_time:123512ms step_avg:151.92ms step:824/3200 train_loss:3.7030 train_time:123662ms step_avg:151.92ms step:825/3200 train_loss:3.6391 train_time:123812ms step_avg:151.92ms step:826/3200 train_loss:3.7333 train_time:123961ms step_avg:151.91ms step:827/3200 train_loss:3.6259 train_time:124111ms step_avg:151.91ms step:828/3200 train_loss:3.8539 train_time:124261ms step_avg:151.91ms step:829/3200 train_loss:3.7433 train_time:124412ms step_avg:151.91ms step:830/3200 train_loss:3.7885 train_time:124562ms step_avg:151.90ms step:831/3200 train_loss:3.6593 train_time:124712ms step_avg:151.90ms step:832/3200 train_loss:3.7075 train_time:124862ms step_avg:151.90ms step:833/3200 train_loss:3.6391 train_time:125012ms step_avg:151.90ms step:834/3200 train_loss:3.7677 train_time:125162ms step_avg:151.90ms step:835/3200 train_loss:3.6032 train_time:125311ms step_avg:151.89ms step:836/3200 train_loss:3.5793 train_time:125460ms step_avg:151.89ms step:837/3200 train_loss:3.8350 train_time:125612ms step_avg:151.89ms step:838/3200 train_loss:3.5344 train_time:125761ms step_avg:151.89ms step:839/3200 train_loss:3.7114 train_time:125912ms step_avg:151.88ms step:840/3200 train_loss:3.5533 train_time:126062ms step_avg:151.88ms step:841/3200 train_loss:3.5923 train_time:126211ms step_avg:151.88ms step:842/3200 train_loss:3.6842 train_time:126360ms step_avg:151.88ms step:843/3200 train_loss:3.7028 train_time:126511ms step_avg:151.87ms step:844/3200 train_loss:3.6955 train_time:126661ms step_avg:151.87ms step:845/3200 train_loss:3.5531 train_time:126812ms step_avg:151.87ms step:846/3200 train_loss:3.7878 train_time:126962ms step_avg:151.87ms step:847/3200 train_loss:3.6470 train_time:127111ms step_avg:151.87ms step:848/3200 train_loss:3.6210 train_time:127261ms step_avg:151.86ms step:849/3200 train_loss:3.7553 train_time:127412ms step_avg:151.86ms step:850/3200 train_loss:3.6126 train_time:127561ms step_avg:151.86ms step:851/3200 train_loss:3.5686 train_time:127711ms step_avg:151.86ms step:852/3200 train_loss:3.8573 train_time:127861ms step_avg:151.85ms step:853/3200 train_loss:3.5699 train_time:128012ms step_avg:151.85ms step:854/3200 train_loss:3.6836 train_time:128162ms step_avg:151.85ms step:855/3200 train_loss:3.7621 train_time:128311ms step_avg:151.85ms step:856/3200 train_loss:3.6419 train_time:128463ms step_avg:151.85ms step:857/3200 train_loss:3.6695 train_time:128612ms step_avg:151.84ms step:858/3200 train_loss:3.7256 train_time:128762ms step_avg:151.84ms step:859/3200 train_loss:3.6037 train_time:128913ms step_avg:151.84ms step:860/3200 train_loss:3.6871 train_time:129063ms step_avg:151.84ms step:861/3200 train_loss:3.7116 train_time:129212ms step_avg:151.84ms step:862/3200 train_loss:3.7646 train_time:129362ms step_avg:151.83ms step:863/3200 train_loss:3.7180 train_time:129512ms step_avg:151.83ms step:864/3200 train_loss:3.6991 train_time:129662ms step_avg:151.83ms step:865/3200 train_loss:3.5140 train_time:129812ms step_avg:151.83ms step:866/3200 train_loss:3.7118 train_time:129962ms step_avg:151.82ms step:867/3200 train_loss:3.9804 train_time:130112ms step_avg:151.82ms step:868/3200 train_loss:3.5774 train_time:130262ms step_avg:151.82ms step:869/3200 train_loss:3.7552 train_time:130411ms step_avg:151.82ms step:870/3200 train_loss:3.7351 train_time:130561ms step_avg:151.81ms step:871/3200 train_loss:3.5704 train_time:130712ms step_avg:151.81ms step:872/3200 train_loss:3.5486 train_time:130861ms step_avg:151.81ms step:873/3200 train_loss:3.7850 train_time:131012ms step_avg:151.81ms step:874/3200 train_loss:3.5739 train_time:131161ms step_avg:151.81ms step:875/3200 train_loss:3.3119 train_time:131311ms step_avg:151.80ms step:875/3200 val_loss:3.6453 train_time:131357ms step_avg:151.86ms step:876/3200 train_loss:3.7610 train_time:131468ms step_avg:151.81ms step:877/3200 train_loss:3.5669 train_time:131619ms step_avg:151.81ms step:878/3200 train_loss:3.7462 train_time:131768ms step_avg:151.81ms step:879/3200 train_loss:3.6021 train_time:131916ms step_avg:151.80ms step:880/3200 train_loss:3.7783 train_time:132065ms step_avg:151.80ms step:881/3200 train_loss:3.4459 train_time:132215ms step_avg:151.80ms step:882/3200 train_loss:3.6094 train_time:132363ms step_avg:151.79ms step:883/3200 train_loss:3.8099 train_time:132517ms step_avg:151.80ms step:884/3200 train_loss:3.9607 train_time:132669ms step_avg:151.80ms step:885/3200 train_loss:3.6871 train_time:132818ms step_avg:151.79ms step:886/3200 train_loss:3.6087 train_time:132968ms step_avg:151.79ms step:887/3200 train_loss:3.6961 train_time:133116ms step_avg:151.79ms step:888/3200 train_loss:4.2003 train_time:133265ms step_avg:151.78ms step:889/3200 train_loss:3.9548 train_time:133416ms step_avg:151.78ms step:890/3200 train_loss:3.6394 train_time:133566ms step_avg:151.78ms step:891/3200 train_loss:3.6574 train_time:133717ms step_avg:151.78ms step:892/3200 train_loss:3.4886 train_time:133867ms step_avg:151.78ms step:893/3200 train_loss:3.8331 train_time:134016ms step_avg:151.77ms step:894/3200 train_loss:3.5498 train_time:134165ms step_avg:151.77ms step:895/3200 train_loss:3.8012 train_time:134316ms step_avg:151.77ms step:896/3200 train_loss:3.8155 train_time:134465ms step_avg:151.77ms step:897/3200 train_loss:3.6188 train_time:134616ms step_avg:151.77ms step:898/3200 train_loss:3.6634 train_time:134767ms step_avg:151.76ms step:899/3200 train_loss:3.7131 train_time:134917ms step_avg:151.76ms step:900/3200 train_loss:3.6005 train_time:135067ms step_avg:151.76ms step:901/3200 train_loss:3.5411 train_time:135216ms step_avg:151.76ms step:902/3200 train_loss:3.7500 train_time:135366ms step_avg:151.76ms step:903/3200 train_loss:3.7550 train_time:135516ms step_avg:151.75ms step:904/3200 train_loss:3.6575 train_time:135667ms step_avg:151.75ms step:905/3200 train_loss:3.6234 train_time:135818ms step_avg:151.75ms step:906/3200 train_loss:3.6123 train_time:135968ms step_avg:151.75ms step:907/3200 train_loss:3.8424 train_time:136118ms step_avg:151.75ms step:908/3200 train_loss:3.6321 train_time:136267ms step_avg:151.75ms step:909/3200 train_loss:3.6753 train_time:136417ms step_avg:151.74ms step:910/3200 train_loss:3.5788 train_time:136567ms step_avg:151.74ms step:911/3200 train_loss:3.6680 train_time:136717ms step_avg:151.74ms step:912/3200 train_loss:3.7470 train_time:136866ms step_avg:151.74ms step:913/3200 train_loss:3.7345 train_time:137017ms step_avg:151.74ms step:914/3200 train_loss:3.6073 train_time:137168ms step_avg:151.73ms step:915/3200 train_loss:3.8537 train_time:137318ms step_avg:151.73ms step:916/3200 train_loss:3.6585 train_time:137467ms step_avg:151.73ms step:917/3200 train_loss:3.7491 train_time:137616ms step_avg:151.73ms step:918/3200 train_loss:3.7165 train_time:137766ms step_avg:151.72ms step:919/3200 train_loss:4.9542 train_time:137916ms step_avg:151.72ms step:920/3200 train_loss:3.6395 train_time:138066ms step_avg:151.72ms step:921/3200 train_loss:3.6974 train_time:138216ms step_avg:151.72ms step:922/3200 train_loss:3.6584 train_time:138366ms step_avg:151.72ms step:923/3200 train_loss:3.7076 train_time:138516ms step_avg:151.72ms step:924/3200 train_loss:3.7218 train_time:138666ms step_avg:151.71ms step:925/3200 train_loss:3.8085 train_time:138817ms step_avg:151.71ms step:926/3200 train_loss:3.7739 train_time:138966ms step_avg:151.71ms step:927/3200 train_loss:3.6772 train_time:139117ms step_avg:151.71ms step:928/3200 train_loss:3.6664 train_time:139266ms step_avg:151.71ms step:929/3200 train_loss:3.8976 train_time:139416ms step_avg:151.70ms step:930/3200 train_loss:3.7346 train_time:139565ms step_avg:151.70ms step:931/3200 train_loss:3.5285 train_time:139716ms step_avg:151.70ms step:932/3200 train_loss:3.6196 train_time:139866ms step_avg:151.70ms step:933/3200 train_loss:3.7962 train_time:140016ms step_avg:151.70ms step:934/3200 train_loss:3.5137 train_time:140165ms step_avg:151.69ms step:935/3200 train_loss:3.6960 train_time:140316ms step_avg:151.69ms step:936/3200 train_loss:3.5692 train_time:140465ms step_avg:151.69ms step:937/3200 train_loss:3.6342 train_time:140615ms step_avg:151.69ms step:938/3200 train_loss:3.7268 train_time:140765ms step_avg:151.69ms step:939/3200 train_loss:3.6623 train_time:140916ms step_avg:151.69ms step:940/3200 train_loss:3.8192 train_time:141065ms step_avg:151.68ms step:941/3200 train_loss:3.6084 train_time:141216ms step_avg:151.68ms step:942/3200 train_loss:3.6696 train_time:141365ms step_avg:151.68ms step:943/3200 train_loss:3.4755 train_time:141516ms step_avg:151.68ms step:944/3200 train_loss:3.8232 train_time:141665ms step_avg:151.68ms step:945/3200 train_loss:3.5330 train_time:141971ms step_avg:151.84ms step:946/3200 train_loss:3.5477 train_time:142130ms step_avg:151.85ms step:947/3200 train_loss:5.1830 train_time:142278ms step_avg:151.84ms step:948/3200 train_loss:3.7266 train_time:142427ms step_avg:151.84ms step:949/3200 train_loss:3.6164 train_time:142576ms step_avg:151.84ms step:950/3200 train_loss:3.5125 train_time:142895ms step_avg:152.02ms step:951/3200 train_loss:3.5703 train_time:143054ms step_avg:152.02ms step:952/3200 train_loss:3.5248 train_time:143201ms step_avg:152.02ms step:953/3200 train_loss:3.6042 train_time:143350ms step_avg:152.01ms step:954/3200 train_loss:3.6797 train_time:143498ms step_avg:152.01ms step:955/3200 train_loss:3.5580 train_time:143646ms step_avg:152.01ms step:956/3200 train_loss:3.5947 train_time:143796ms step_avg:152.00ms step:957/3200 train_loss:3.5635 train_time:143953ms step_avg:152.01ms step:958/3200 train_loss:3.6305 train_time:144102ms step_avg:152.01ms step:959/3200 train_loss:3.6191 train_time:144254ms step_avg:152.01ms step:960/3200 train_loss:3.6337 train_time:144402ms step_avg:152.00ms step:961/3200 train_loss:3.5149 train_time:144551ms step_avg:152.00ms step:962/3200 train_loss:3.7771 train_time:144700ms step_avg:152.00ms step:963/3200 train_loss:3.7250 train_time:144855ms step_avg:152.00ms step:964/3200 train_loss:3.6200 train_time:145007ms step_avg:152.00ms step:965/3200 train_loss:3.5703 train_time:145158ms step_avg:152.00ms step:966/3200 train_loss:3.6088 train_time:145308ms step_avg:152.00ms step:967/3200 train_loss:3.8312 train_time:145457ms step_avg:151.99ms step:968/3200 train_loss:3.6552 train_time:145606ms step_avg:151.99ms step:969/3200 train_loss:3.6389 train_time:145755ms step_avg:151.99ms step:970/3200 train_loss:3.6963 train_time:145906ms step_avg:151.99ms step:971/3200 train_loss:3.5124 train_time:146057ms step_avg:151.98ms step:972/3200 train_loss:3.6626 train_time:146209ms step_avg:151.98ms step:973/3200 train_loss:3.6167 train_time:146357ms step_avg:151.98ms step:974/3200 train_loss:3.6554 train_time:146507ms step_avg:151.98ms step:975/3200 train_loss:3.7322 train_time:146656ms step_avg:151.98ms step:976/3200 train_loss:3.6112 train_time:146805ms step_avg:151.97ms step:977/3200 train_loss:3.8060 train_time:146957ms step_avg:151.97ms step:978/3200 train_loss:3.6976 train_time:147108ms step_avg:151.97ms step:979/3200 train_loss:3.5143 train_time:147259ms step_avg:151.97ms step:980/3200 train_loss:3.8051 train_time:147409ms step_avg:151.97ms step:981/3200 train_loss:3.5391 train_time:147557ms step_avg:151.96ms step:982/3200 train_loss:3.7077 train_time:147707ms step_avg:151.96ms step:983/3200 train_loss:3.6828 train_time:147858ms step_avg:151.96ms step:984/3200 train_loss:3.6871 train_time:148010ms step_avg:151.96ms step:985/3200 train_loss:3.6319 train_time:148160ms step_avg:151.96ms step:986/3200 train_loss:3.7174 train_time:148313ms step_avg:151.96ms step:987/3200 train_loss:3.5446 train_time:148461ms step_avg:151.96ms step:988/3200 train_loss:3.6128 train_time:148611ms step_avg:151.95ms step:989/3200 train_loss:3.6114 train_time:148761ms step_avg:151.95ms step:990/3200 train_loss:3.5538 train_time:148913ms step_avg:151.95ms step:991/3200 train_loss:3.7729 train_time:149063ms step_avg:151.95ms step:992/3200 train_loss:3.5897 train_time:149215ms step_avg:151.95ms step:993/3200 train_loss:3.5661 train_time:149364ms step_avg:151.95ms step:994/3200 train_loss:3.6412 train_time:149515ms step_avg:151.95ms step:995/3200 train_loss:3.7237 train_time:149663ms step_avg:151.94ms step:996/3200 train_loss:3.6667 train_time:149814ms step_avg:151.94ms step:997/3200 train_loss:3.5738 train_time:149966ms step_avg:151.94ms step:998/3200 train_loss:3.9259 train_time:150117ms step_avg:151.94ms step:999/3200 train_loss:3.5894 train_time:150268ms step_avg:151.94ms step:1000/3200 train_loss:3.7181 train_time:150418ms step_avg:151.94ms step:1000/3200 val_loss:3.6074 train_time:150464ms step_avg:151.98ms step:1001/3200 train_loss:3.5843 train_time:150572ms step_avg:151.94ms step:1002/3200 train_loss:3.6330 train_time:150724ms step_avg:151.94ms step:1003/3200 train_loss:3.5107 train_time:150874ms step_avg:151.94ms step:1004/3200 train_loss:3.6997 train_time:151022ms step_avg:151.93ms step:1005/3200 train_loss:3.7499 train_time:151171ms step_avg:151.93ms step:1006/3200 train_loss:3.5281 train_time:151319ms step_avg:151.93ms step:1007/3200 train_loss:3.6119 train_time:151469ms step_avg:151.92ms step:1008/3200 train_loss:3.5751 train_time:151622ms step_avg:151.93ms step:1009/3200 train_loss:3.6961 train_time:151771ms step_avg:151.92ms step:1010/3200 train_loss:3.7923 train_time:151922ms step_avg:151.92ms step:1011/3200 train_loss:3.6944 train_time:152071ms step_avg:151.92ms step:1012/3200 train_loss:3.6568 train_time:152220ms step_avg:151.92ms step:1013/3200 train_loss:3.5160 train_time:152369ms step_avg:151.91ms step:1014/3200 train_loss:3.6571 train_time:152521ms step_avg:151.91ms step:1015/3200 train_loss:3.7624 train_time:152670ms step_avg:151.91ms step:1016/3200 train_loss:3.4825 train_time:152820ms step_avg:151.91ms step:1017/3200 train_loss:3.5723 train_time:152969ms step_avg:151.91ms step:1018/3200 train_loss:3.5624 train_time:153120ms step_avg:151.90ms step:1019/3200 train_loss:3.5207 train_time:153269ms step_avg:151.90ms step:1020/3200 train_loss:3.6595 train_time:153420ms step_avg:151.90ms step:1021/3200 train_loss:3.5669 train_time:153568ms step_avg:151.90ms step:1022/3200 train_loss:3.5016 train_time:153720ms step_avg:151.90ms step:1023/3200 train_loss:3.6139 train_time:153869ms step_avg:151.89ms step:1024/3200 train_loss:3.6357 train_time:154020ms step_avg:151.89ms step:1025/3200 train_loss:3.6191 train_time:154168ms step_avg:151.89ms step:1026/3200 train_loss:3.6195 train_time:154320ms step_avg:151.89ms step:1027/3200 train_loss:3.7895 train_time:154469ms step_avg:151.89ms step:1028/3200 train_loss:3.4672 train_time:154620ms step_avg:151.89ms step:1029/3200 train_loss:3.5340 train_time:154768ms step_avg:151.88ms step:1030/3200 train_loss:3.4878 train_time:154920ms step_avg:151.88ms step:1031/3200 train_loss:3.6604 train_time:155068ms step_avg:151.88ms step:1032/3200 train_loss:3.6380 train_time:155219ms step_avg:151.88ms step:1033/3200 train_loss:3.8190 train_time:155368ms step_avg:151.88ms step:1034/3200 train_loss:3.6308 train_time:155520ms step_avg:151.87ms step:1035/3200 train_loss:3.5553 train_time:155668ms step_avg:151.87ms step:1036/3200 train_loss:3.5730 train_time:155819ms step_avg:151.87ms step:1037/3200 train_loss:3.6316 train_time:155968ms step_avg:151.87ms step:1038/3200 train_loss:3.9404 train_time:156118ms step_avg:151.87ms step:1039/3200 train_loss:3.7631 train_time:156266ms step_avg:151.86ms step:1040/3200 train_loss:3.6541 train_time:156419ms step_avg:151.86ms step:1041/3200 train_loss:3.5581 train_time:156568ms step_avg:151.86ms step:1042/3200 train_loss:3.6218 train_time:156719ms step_avg:151.86ms step:1043/3200 train_loss:3.6626 train_time:156869ms step_avg:151.86ms step:1044/3200 train_loss:3.5919 train_time:157019ms step_avg:151.86ms step:1045/3200 train_loss:3.5993 train_time:157167ms step_avg:151.85ms step:1046/3200 train_loss:3.6764 train_time:157318ms step_avg:151.85ms step:1047/3200 train_loss:3.5799 train_time:157468ms step_avg:151.85ms step:1048/3200 train_loss:3.7803 train_time:157618ms step_avg:151.85ms step:1049/3200 train_loss:3.6410 train_time:157768ms step_avg:151.85ms step:1050/3200 train_loss:3.5613 train_time:157920ms step_avg:151.85ms step:1051/3200 train_loss:3.5285 train_time:158068ms step_avg:151.84ms step:1052/3200 train_loss:3.6536 train_time:158219ms step_avg:151.84ms step:1053/3200 train_loss:3.5254 train_time:158366ms step_avg:151.84ms step:1054/3200 train_loss:3.8522 train_time:158518ms step_avg:151.84ms step:1055/3200 train_loss:3.6921 train_time:158667ms step_avg:151.83ms step:1056/3200 train_loss:3.5450 train_time:158819ms step_avg:151.83ms step:1057/3200 train_loss:3.6487 train_time:158968ms step_avg:151.83ms step:1058/3200 train_loss:3.7226 train_time:159119ms step_avg:151.83ms step:1059/3200 train_loss:3.4401 train_time:159267ms step_avg:151.83ms step:1060/3200 train_loss:3.5688 train_time:159419ms step_avg:151.83ms step:1061/3200 train_loss:3.5933 train_time:159568ms step_avg:151.82ms step:1062/3200 train_loss:3.5557 train_time:159719ms step_avg:151.82ms step:1063/3200 train_loss:3.5331 train_time:159869ms step_avg:151.82ms step:1064/3200 train_loss:3.6301 train_time:160021ms step_avg:151.82ms step:1065/3200 train_loss:3.5307 train_time:160170ms step_avg:151.82ms step:1066/3200 train_loss:3.5247 train_time:160320ms step_avg:151.82ms step:1067/3200 train_loss:3.5467 train_time:160467ms step_avg:151.81ms step:1068/3200 train_loss:3.4579 train_time:160618ms step_avg:151.81ms step:1069/3200 train_loss:3.5694 train_time:160768ms step_avg:151.81ms step:1070/3200 train_loss:3.4405 train_time:160920ms step_avg:151.81ms step:1071/3200 train_loss:3.7004 train_time:161070ms step_avg:151.81ms step:1072/3200 train_loss:3.6516 train_time:161220ms step_avg:151.81ms step:1073/3200 train_loss:3.6028 train_time:161368ms step_avg:151.80ms step:1074/3200 train_loss:3.6669 train_time:161519ms step_avg:151.80ms step:1075/3200 train_loss:3.6060 train_time:161667ms step_avg:151.80ms step:1076/3200 train_loss:3.5503 train_time:161818ms step_avg:151.80ms step:1077/3200 train_loss:3.9411 train_time:161968ms step_avg:151.80ms step:1078/3200 train_loss:3.6119 train_time:162120ms step_avg:151.80ms step:1079/3200 train_loss:3.3053 train_time:162269ms step_avg:151.80ms step:1080/3200 train_loss:3.6848 train_time:162419ms step_avg:151.79ms step:1081/3200 train_loss:3.5945 train_time:162567ms step_avg:151.79ms step:1082/3200 train_loss:3.6595 train_time:162719ms step_avg:151.79ms step:1083/3200 train_loss:3.7631 train_time:162868ms step_avg:151.79ms step:1084/3200 train_loss:3.6563 train_time:163019ms step_avg:151.79ms step:1085/3200 train_loss:3.6310 train_time:163170ms step_avg:151.79ms step:1086/3200 train_loss:3.5912 train_time:163321ms step_avg:151.78ms step:1087/3200 train_loss:3.7863 train_time:163468ms step_avg:151.78ms step:1088/3200 train_loss:3.6740 train_time:163620ms step_avg:151.78ms step:1089/3200 train_loss:3.5125 train_time:163768ms step_avg:151.78ms step:1090/3200 train_loss:3.5340 train_time:163920ms step_avg:151.78ms step:1091/3200 train_loss:3.6484 train_time:164069ms step_avg:151.77ms step:1092/3200 train_loss:3.4516 train_time:164220ms step_avg:151.77ms step:1093/3200 train_loss:3.6468 train_time:164369ms step_avg:151.77ms step:1094/3200 train_loss:3.7824 train_time:164519ms step_avg:151.77ms step:1095/3200 train_loss:3.6162 train_time:164667ms step_avg:151.77ms step:1096/3200 train_loss:3.5669 train_time:164820ms step_avg:151.77ms step:1097/3200 train_loss:3.5915 train_time:164969ms step_avg:151.77ms step:1098/3200 train_loss:3.6396 train_time:165119ms step_avg:151.76ms step:1099/3200 train_loss:3.7140 train_time:165267ms step_avg:151.76ms step:1100/3200 train_loss:3.6686 train_time:165419ms step_avg:151.76ms step:1101/3200 train_loss:3.5923 train_time:165567ms step_avg:151.76ms step:1102/3200 train_loss:3.4519 train_time:165718ms step_avg:151.76ms step:1103/3200 train_loss:3.5246 train_time:165867ms step_avg:151.75ms step:1104/3200 train_loss:3.6077 train_time:166020ms step_avg:151.75ms step:1105/3200 train_loss:3.4822 train_time:166168ms step_avg:151.75ms step:1106/3200 train_loss:4.2376 train_time:166319ms step_avg:151.75ms step:1107/3200 train_loss:3.3855 train_time:166468ms step_avg:151.75ms step:1108/3200 train_loss:3.7320 train_time:166619ms step_avg:151.75ms step:1109/3200 train_loss:3.5092 train_time:166767ms step_avg:151.74ms step:1110/3200 train_loss:3.6573 train_time:166919ms step_avg:151.74ms step:1111/3200 train_loss:3.5929 train_time:167068ms step_avg:151.74ms step:1112/3200 train_loss:3.6349 train_time:167220ms step_avg:151.74ms step:1113/3200 train_loss:3.7276 train_time:167368ms step_avg:151.74ms step:1114/3200 train_loss:3.5857 train_time:167519ms step_avg:151.74ms step:1115/3200 train_loss:3.5279 train_time:167667ms step_avg:151.74ms step:1116/3200 train_loss:3.4277 train_time:167817ms step_avg:151.73ms step:1117/3200 train_loss:3.5998 train_time:167967ms step_avg:151.73ms step:1118/3200 train_loss:3.7496 train_time:168119ms step_avg:151.73ms step:1119/3200 train_loss:3.7850 train_time:168268ms step_avg:151.73ms step:1120/3200 train_loss:3.6215 train_time:168420ms step_avg:151.73ms step:1121/3200 train_loss:3.6510 train_time:168568ms step_avg:151.73ms step:1122/3200 train_loss:3.5486 train_time:168719ms step_avg:151.73ms step:1123/3200 train_loss:3.6127 train_time:168867ms step_avg:151.72ms step:1124/3200 train_loss:3.7485 train_time:169019ms step_avg:151.72ms step:1125/3200 train_loss:3.5183 train_time:169167ms step_avg:151.72ms step:1125/3200 val_loss:3.5799 train_time:169215ms step_avg:151.76ms step:1126/3200 train_loss:3.4219 train_time:169329ms step_avg:151.73ms step:1127/3200 train_loss:3.6382 train_time:169480ms step_avg:151.73ms step:1128/3200 train_loss:3.8593 train_time:169628ms step_avg:151.72ms step:1129/3200 train_loss:3.3990 train_time:169776ms step_avg:151.72ms step:1130/3200 train_loss:3.7182 train_time:169926ms step_avg:151.72ms step:1131/3200 train_loss:3.5475 train_time:170075ms step_avg:151.72ms step:1132/3200 train_loss:3.5846 train_time:170225ms step_avg:151.72ms step:1133/3200 train_loss:3.5301 train_time:170379ms step_avg:151.72ms step:1134/3200 train_loss:3.6932 train_time:170688ms step_avg:151.86ms step:1135/3200 train_loss:3.6229 train_time:170847ms step_avg:151.86ms step:1136/3200 train_loss:3.6753 train_time:170996ms step_avg:151.86ms step:1137/3200 train_loss:3.7082 train_time:171146ms step_avg:151.86ms step:1138/3200 train_loss:3.6221 train_time:171294ms step_avg:151.86ms step:1139/3200 train_loss:3.5267 train_time:171442ms step_avg:151.85ms step:1140/3200 train_loss:3.8247 train_time:171762ms step_avg:152.00ms step:1141/3200 train_loss:3.6345 train_time:171920ms step_avg:152.01ms step:1142/3200 train_loss:3.7374 train_time:172068ms step_avg:152.00ms step:1143/3200 train_loss:3.6161 train_time:172216ms step_avg:152.00ms step:1144/3200 train_loss:3.5310 train_time:172366ms step_avg:152.00ms step:1145/3200 train_loss:3.6335 train_time:172513ms step_avg:151.99ms step:1146/3200 train_loss:3.7535 train_time:172664ms step_avg:151.99ms step:1147/3200 train_loss:3.7304 train_time:172818ms step_avg:152.00ms step:1148/3200 train_loss:3.6820 train_time:172969ms step_avg:151.99ms step:1149/3200 train_loss:3.6650 train_time:173117ms step_avg:151.99ms step:1150/3200 train_loss:3.5159 train_time:173266ms step_avg:151.99ms step:1151/3200 train_loss:3.5385 train_time:173414ms step_avg:151.98ms step:1152/3200 train_loss:3.5031 train_time:173565ms step_avg:151.98ms step:1153/3200 train_loss:3.6546 train_time:173714ms step_avg:151.98ms step:1154/3200 train_loss:3.6288 train_time:173867ms step_avg:151.98ms step:1155/3200 train_loss:3.6852 train_time:174018ms step_avg:151.98ms step:1156/3200 train_loss:3.5347 train_time:174168ms step_avg:151.98ms step:1157/3200 train_loss:3.6987 train_time:174317ms step_avg:151.98ms step:1158/3200 train_loss:3.6555 train_time:174467ms step_avg:151.98ms step:1159/3200 train_loss:3.4676 train_time:174616ms step_avg:151.97ms step:1160/3200 train_loss:3.5174 train_time:174768ms step_avg:151.97ms step:1161/3200 train_loss:3.4950 train_time:174918ms step_avg:151.97ms step:1162/3200 train_loss:3.3224 train_time:175069ms step_avg:151.97ms step:1163/3200 train_loss:3.6065 train_time:175218ms step_avg:151.97ms step:1164/3200 train_loss:3.5772 train_time:175368ms step_avg:151.97ms step:1165/3200 train_loss:3.4534 train_time:175517ms step_avg:151.96ms step:1166/3200 train_loss:3.4374 train_time:175668ms step_avg:151.96ms step:1167/3200 train_loss:3.5483 train_time:175816ms step_avg:151.96ms step:1168/3200 train_loss:3.5617 train_time:175968ms step_avg:151.96ms step:1169/3200 train_loss:3.8791 train_time:176118ms step_avg:151.96ms step:1170/3200 train_loss:3.5604 train_time:176268ms step_avg:151.95ms step:1171/3200 train_loss:3.5686 train_time:176416ms step_avg:151.95ms step:1172/3200 train_loss:3.4855 train_time:176568ms step_avg:151.95ms step:1173/3200 train_loss:3.5809 train_time:176717ms step_avg:151.95ms step:1174/3200 train_loss:3.7143 train_time:176869ms step_avg:151.95ms step:1175/3200 train_loss:3.5572 train_time:177018ms step_avg:151.95ms step:1176/3200 train_loss:3.5706 train_time:177169ms step_avg:151.95ms step:1177/3200 train_loss:3.6219 train_time:177318ms step_avg:151.94ms step:1178/3200 train_loss:3.6072 train_time:177469ms step_avg:151.94ms step:1179/3200 train_loss:3.6663 train_time:177617ms step_avg:151.94ms step:1180/3200 train_loss:3.5735 train_time:177769ms step_avg:151.94ms step:1181/3200 train_loss:3.5703 train_time:177919ms step_avg:151.94ms step:1182/3200 train_loss:3.5192 train_time:178070ms step_avg:151.94ms step:1183/3200 train_loss:3.5798 train_time:178220ms step_avg:151.94ms step:1184/3200 train_loss:3.4996 train_time:178371ms step_avg:151.93ms step:1185/3200 train_loss:3.6704 train_time:178521ms step_avg:151.93ms step:1186/3200 train_loss:3.7368 train_time:178671ms step_avg:151.93ms step:1187/3200 train_loss:3.5340 train_time:178820ms step_avg:151.93ms step:1188/3200 train_loss:3.5875 train_time:178969ms step_avg:151.93ms step:1189/3200 train_loss:3.6096 train_time:179119ms step_avg:151.92ms step:1190/3200 train_loss:3.4477 train_time:179268ms step_avg:151.92ms step:1191/3200 train_loss:3.6240 train_time:179418ms step_avg:151.92ms step:1192/3200 train_loss:3.7705 train_time:179569ms step_avg:151.92ms step:1193/3200 train_loss:3.5683 train_time:179719ms step_avg:151.92ms step:1194/3200 train_loss:3.4528 train_time:179870ms step_avg:151.92ms step:1195/3200 train_loss:3.7403 train_time:180018ms step_avg:151.91ms step:1196/3200 train_loss:3.5520 train_time:180169ms step_avg:151.91ms step:1197/3200 train_loss:3.5634 train_time:180319ms step_avg:151.91ms step:1198/3200 train_loss:3.4561 train_time:180469ms step_avg:151.91ms step:1199/3200 train_loss:3.4697 train_time:180620ms step_avg:151.91ms step:1200/3200 train_loss:3.5233 train_time:180770ms step_avg:151.91ms step:1201/3200 train_loss:3.6100 train_time:180918ms step_avg:151.90ms step:1202/3200 train_loss:3.6736 train_time:181070ms step_avg:151.90ms step:1203/3200 train_loss:3.7155 train_time:181220ms step_avg:151.90ms step:1204/3200 train_loss:3.5882 train_time:181370ms step_avg:151.90ms step:1205/3200 train_loss:3.5116 train_time:181519ms step_avg:151.90ms step:1206/3200 train_loss:3.6049 train_time:181669ms step_avg:151.90ms step:1207/3200 train_loss:3.6539 train_time:181818ms step_avg:151.89ms step:1208/3200 train_loss:3.6959 train_time:181969ms step_avg:151.89ms step:1209/3200 train_loss:3.5768 train_time:182119ms step_avg:151.89ms step:1210/3200 train_loss:3.4393 train_time:182269ms step_avg:151.89ms step:1211/3200 train_loss:3.4851 train_time:182419ms step_avg:151.89ms step:1212/3200 train_loss:3.5778 train_time:182569ms step_avg:151.89ms step:1213/3200 train_loss:3.5982 train_time:182719ms step_avg:151.89ms step:1214/3200 train_loss:3.6300 train_time:182870ms step_avg:151.88ms step:1215/3200 train_loss:3.5124 train_time:183020ms step_avg:151.88ms step:1216/3200 train_loss:3.5792 train_time:183170ms step_avg:151.88ms step:1217/3200 train_loss:3.5208 train_time:183320ms step_avg:151.88ms step:1218/3200 train_loss:3.5045 train_time:183470ms step_avg:151.88ms step:1219/3200 train_loss:3.6061 train_time:183619ms step_avg:151.88ms step:1220/3200 train_loss:3.4567 train_time:183768ms step_avg:151.87ms step:1221/3200 train_loss:3.6766 train_time:183917ms step_avg:151.87ms step:1222/3200 train_loss:3.6996 train_time:184069ms step_avg:151.87ms step:1223/3200 train_loss:3.6243 train_time:184218ms step_avg:151.87ms step:1224/3200 train_loss:3.4822 train_time:184369ms step_avg:151.87ms step:1225/3200 train_loss:3.4584 train_time:184519ms step_avg:151.87ms step:1226/3200 train_loss:3.5481 train_time:184668ms step_avg:151.87ms step:1227/3200 train_loss:3.5320 train_time:184818ms step_avg:151.86ms step:1228/3200 train_loss:3.4740 train_time:184969ms step_avg:151.86ms step:1229/3200 train_loss:3.6338 train_time:185119ms step_avg:151.86ms step:1230/3200 train_loss:3.5637 train_time:185270ms step_avg:151.86ms step:1231/3200 train_loss:3.6192 train_time:185419ms step_avg:151.86ms step:1232/3200 train_loss:3.7706 train_time:185569ms step_avg:151.86ms step:1233/3200 train_loss:3.6752 train_time:185717ms step_avg:151.85ms step:1234/3200 train_loss:3.6129 train_time:185869ms step_avg:151.85ms step:1235/3200 train_loss:3.7664 train_time:186018ms step_avg:151.85ms step:1236/3200 train_loss:3.5220 train_time:186169ms step_avg:151.85ms step:1237/3200 train_loss:3.4922 train_time:186320ms step_avg:151.85ms step:1238/3200 train_loss:3.4447 train_time:186469ms step_avg:151.85ms step:1239/3200 train_loss:3.5197 train_time:186619ms step_avg:151.85ms step:1240/3200 train_loss:3.5230 train_time:186769ms step_avg:151.84ms step:1241/3200 train_loss:3.5765 train_time:186918ms step_avg:151.84ms step:1242/3200 train_loss:3.6168 train_time:187068ms step_avg:151.84ms step:1243/3200 train_loss:3.4929 train_time:187218ms step_avg:151.84ms step:1244/3200 train_loss:3.5903 train_time:187369ms step_avg:151.84ms step:1245/3200 train_loss:3.6055 train_time:187520ms step_avg:151.84ms step:1246/3200 train_loss:3.6044 train_time:187669ms step_avg:151.84ms step:1247/3200 train_loss:3.4290 train_time:187819ms step_avg:151.83ms step:1248/3200 train_loss:3.5712 train_time:187969ms step_avg:151.83ms step:1249/3200 train_loss:3.6290 train_time:188119ms step_avg:151.83ms step:1250/3200 train_loss:3.5959 train_time:188269ms step_avg:151.83ms step:1250/3200 val_loss:3.5517 train_time:188314ms step_avg:151.87ms step:1251/3200 train_loss:3.4944 train_time:188426ms step_avg:151.83ms step:1252/3200 train_loss:3.6993 train_time:188579ms step_avg:151.84ms step:1253/3200 train_loss:3.5696 train_time:188728ms step_avg:151.83ms step:1254/3200 train_loss:3.5050 train_time:188878ms step_avg:151.83ms step:1255/3200 train_loss:3.6340 train_time:189025ms step_avg:151.83ms step:1256/3200 train_loss:3.6988 train_time:189175ms step_avg:151.83ms step:1257/3200 train_loss:3.5097 train_time:189323ms step_avg:151.82ms step:1258/3200 train_loss:3.5450 train_time:189477ms step_avg:151.82ms step:1259/3200 train_loss:3.5651 train_time:189627ms step_avg:151.82ms step:1260/3200 train_loss:3.5360 train_time:189779ms step_avg:151.82ms step:1261/3200 train_loss:3.3980 train_time:189928ms step_avg:151.82ms step:1262/3200 train_loss:3.5038 train_time:190078ms step_avg:151.82ms step:1263/3200 train_loss:3.5614 train_time:190227ms step_avg:151.82ms step:1264/3200 train_loss:3.4166 train_time:190377ms step_avg:151.82ms step:1265/3200 train_loss:3.6265 train_time:190526ms step_avg:151.81ms step:1266/3200 train_loss:3.6133 train_time:190679ms step_avg:151.81ms step:1267/3200 train_loss:3.6193 train_time:190829ms step_avg:151.81ms step:1268/3200 train_loss:3.5611 train_time:190979ms step_avg:151.81ms step:1269/3200 train_loss:3.5999 train_time:191127ms step_avg:151.81ms step:1270/3200 train_loss:3.4520 train_time:191279ms step_avg:151.81ms step:1271/3200 train_loss:3.3033 train_time:191427ms step_avg:151.81ms step:1272/3200 train_loss:3.5858 train_time:191579ms step_avg:151.81ms step:1273/3200 train_loss:3.5439 train_time:191727ms step_avg:151.80ms step:1274/3200 train_loss:3.6020 train_time:191880ms step_avg:151.80ms step:1275/3200 train_loss:3.5474 train_time:192029ms step_avg:151.80ms step:1276/3200 train_loss:3.6385 train_time:192179ms step_avg:151.80ms step:1277/3200 train_loss:3.6624 train_time:192328ms step_avg:151.80ms step:1278/3200 train_loss:3.6210 train_time:192480ms step_avg:151.80ms step:1279/3200 train_loss:3.6133 train_time:192627ms step_avg:151.79ms step:1280/3200 train_loss:3.4481 train_time:192779ms step_avg:151.79ms step:1281/3200 train_loss:3.5618 train_time:192927ms step_avg:151.79ms step:1282/3200 train_loss:3.6252 train_time:193079ms step_avg:151.79ms step:1283/3200 train_loss:3.6613 train_time:193227ms step_avg:151.79ms step:1284/3200 train_loss:3.5557 train_time:193378ms step_avg:151.79ms step:1285/3200 train_loss:3.5709 train_time:193527ms step_avg:151.79ms step:1286/3200 train_loss:3.5563 train_time:193678ms step_avg:151.79ms step:1287/3200 train_loss:3.5355 train_time:193827ms step_avg:151.78ms step:1288/3200 train_loss:3.6673 train_time:193979ms step_avg:151.78ms step:1289/3200 train_loss:3.5031 train_time:194127ms step_avg:151.78ms step:1290/3200 train_loss:3.5902 train_time:194278ms step_avg:151.78ms step:1291/3200 train_loss:3.6613 train_time:194427ms step_avg:151.78ms step:1292/3200 train_loss:3.5889 train_time:194579ms step_avg:151.78ms step:1293/3200 train_loss:3.6848 train_time:194727ms step_avg:151.77ms step:1294/3200 train_loss:3.7047 train_time:194879ms step_avg:151.77ms step:1295/3200 train_loss:3.6788 train_time:195028ms step_avg:151.77ms step:1296/3200 train_loss:3.4817 train_time:195179ms step_avg:151.77ms step:1297/3200 train_loss:3.5571 train_time:195328ms step_avg:151.77ms step:1298/3200 train_loss:3.4590 train_time:195479ms step_avg:151.77ms step:1299/3200 train_loss:3.5253 train_time:195627ms step_avg:151.77ms step:1300/3200 train_loss:3.6005 train_time:195778ms step_avg:151.77ms step:1301/3200 train_loss:3.6033 train_time:195927ms step_avg:151.76ms step:1302/3200 train_loss:3.6054 train_time:196078ms step_avg:151.76ms step:1303/3200 train_loss:3.7645 train_time:196228ms step_avg:151.76ms step:1304/3200 train_loss:3.5388 train_time:196379ms step_avg:151.76ms step:1305/3200 train_loss:3.7319 train_time:196528ms step_avg:151.76ms step:1306/3200 train_loss:3.4663 train_time:196679ms step_avg:151.76ms step:1307/3200 train_loss:3.6584 train_time:196828ms step_avg:151.76ms step:1308/3200 train_loss:3.6559 train_time:196978ms step_avg:151.76ms step:1309/3200 train_loss:3.5192 train_time:197127ms step_avg:151.75ms step:1310/3200 train_loss:3.4994 train_time:197280ms step_avg:151.75ms step:1311/3200 train_loss:3.5379 train_time:197431ms step_avg:151.75ms step:1312/3200 train_loss:3.4919 train_time:197581ms step_avg:151.75ms step:1313/3200 train_loss:3.6031 train_time:197730ms step_avg:151.75ms step:1314/3200 train_loss:3.5488 train_time:197880ms step_avg:151.75ms step:1315/3200 train_loss:3.2693 train_time:198030ms step_avg:151.75ms step:1316/3200 train_loss:3.4967 train_time:198181ms step_avg:151.75ms step:1317/3200 train_loss:3.5783 train_time:198330ms step_avg:151.74ms step:1318/3200 train_loss:3.6096 train_time:198481ms step_avg:151.74ms step:1319/3200 train_loss:3.4838 train_time:198632ms step_avg:151.74ms step:1320/3200 train_loss:3.6238 train_time:198782ms step_avg:151.74ms step:1321/3200 train_loss:3.6785 train_time:198931ms step_avg:151.74ms step:1322/3200 train_loss:3.5664 train_time:199080ms step_avg:151.74ms step:1323/3200 train_loss:3.5124 train_time:199393ms step_avg:151.86ms step:1324/3200 train_loss:3.5403 train_time:199546ms step_avg:151.86ms step:1325/3200 train_loss:3.6395 train_time:199695ms step_avg:151.86ms step:1326/3200 train_loss:3.6922 train_time:199843ms step_avg:151.86ms step:1327/3200 train_loss:3.4440 train_time:199992ms step_avg:151.85ms step:1328/3200 train_loss:3.3698 train_time:200141ms step_avg:151.85ms step:1329/3200 train_loss:3.6806 train_time:200292ms step_avg:151.85ms step:1330/3200 train_loss:3.5160 train_time:200608ms step_avg:151.98ms step:1331/3200 train_loss:3.6558 train_time:200767ms step_avg:151.98ms step:1332/3200 train_loss:3.5519 train_time:200916ms step_avg:151.98ms step:1333/3200 train_loss:3.9607 train_time:201064ms step_avg:151.98ms step:1334/3200 train_loss:3.6559 train_time:201214ms step_avg:151.97ms step:1335/3200 train_loss:3.5709 train_time:201361ms step_avg:151.97ms step:1336/3200 train_loss:3.5032 train_time:201511ms step_avg:151.97ms step:1337/3200 train_loss:3.4998 train_time:201666ms step_avg:151.97ms step:1338/3200 train_loss:3.7633 train_time:201817ms step_avg:151.97ms step:1339/3200 train_loss:3.7046 train_time:201965ms step_avg:151.97ms step:1340/3200 train_loss:3.5438 train_time:202117ms step_avg:151.97ms step:1341/3200 train_loss:3.5034 train_time:202265ms step_avg:151.96ms step:1342/3200 train_loss:3.7999 train_time:202417ms step_avg:151.96ms step:1343/3200 train_loss:3.5749 train_time:202567ms step_avg:151.96ms step:1344/3200 train_loss:3.5702 train_time:202720ms step_avg:151.96ms step:1345/3200 train_loss:3.6277 train_time:202869ms step_avg:151.96ms step:1346/3200 train_loss:3.5964 train_time:203021ms step_avg:151.96ms step:1347/3200 train_loss:3.4939 train_time:203170ms step_avg:151.96ms step:1348/3200 train_loss:3.4541 train_time:203320ms step_avg:151.96ms step:1349/3200 train_loss:3.5518 train_time:203469ms step_avg:151.96ms step:1350/3200 train_loss:3.4757 train_time:203621ms step_avg:151.96ms step:1351/3200 train_loss:3.6096 train_time:203770ms step_avg:151.95ms step:1352/3200 train_loss:3.4552 train_time:203921ms step_avg:151.95ms step:1353/3200 train_loss:3.5218 train_time:204070ms step_avg:151.95ms step:1354/3200 train_loss:3.6186 train_time:204220ms step_avg:151.95ms step:1355/3200 train_loss:3.4645 train_time:204369ms step_avg:151.95ms step:1356/3200 train_loss:3.3846 train_time:204520ms step_avg:151.95ms step:1357/3200 train_loss:3.7301 train_time:204670ms step_avg:151.94ms step:1358/3200 train_loss:3.6589 train_time:204821ms step_avg:151.94ms step:1359/3200 train_loss:3.3833 train_time:204970ms step_avg:151.94ms step:1360/3200 train_loss:3.6617 train_time:205121ms step_avg:151.94ms step:1361/3200 train_loss:3.5517 train_time:205271ms step_avg:151.94ms step:1362/3200 train_loss:3.4079 train_time:205422ms step_avg:151.94ms step:1363/3200 train_loss:3.5892 train_time:205572ms step_avg:151.94ms step:1364/3200 train_loss:3.4780 train_time:205723ms step_avg:151.94ms step:1365/3200 train_loss:3.5006 train_time:205873ms step_avg:151.94ms step:1366/3200 train_loss:3.5268 train_time:206023ms step_avg:151.93ms step:1367/3200 train_loss:3.6277 train_time:206173ms step_avg:151.93ms step:1368/3200 train_loss:3.6105 train_time:206323ms step_avg:151.93ms step:1369/3200 train_loss:3.5671 train_time:206474ms step_avg:151.93ms step:1370/3200 train_loss:3.4811 train_time:206623ms step_avg:151.93ms step:1371/3200 train_loss:3.8055 train_time:206774ms step_avg:151.93ms step:1372/3200 train_loss:3.5397 train_time:206924ms step_avg:151.93ms step:1373/3200 train_loss:3.5726 train_time:207075ms step_avg:151.93ms step:1374/3200 train_loss:3.5710 train_time:207224ms step_avg:151.92ms step:1375/3200 train_loss:3.3647 train_time:207375ms step_avg:151.92ms step:1375/3200 val_loss:3.5310 train_time:207422ms step_avg:151.96ms step:1376/3200 train_loss:3.7761 train_time:207536ms step_avg:151.93ms step:1377/3200 train_loss:3.5545 train_time:207687ms step_avg:151.93ms step:1378/3200 train_loss:3.6929 train_time:207836ms step_avg:151.93ms step:1379/3200 train_loss:3.7414 train_time:207985ms step_avg:151.92ms step:1380/3200 train_loss:3.4168 train_time:208133ms step_avg:151.92ms step:1381/3200 train_loss:3.5308 train_time:208281ms step_avg:151.92ms step:1382/3200 train_loss:4.0083 train_time:208430ms step_avg:151.92ms step:1383/3200 train_loss:3.4504 train_time:208587ms step_avg:151.92ms step:1384/3200 train_loss:3.6087 train_time:208739ms step_avg:151.92ms step:1385/3200 train_loss:3.6879 train_time:208889ms step_avg:151.92ms step:1386/3200 train_loss:3.5945 train_time:209037ms step_avg:151.92ms step:1387/3200 train_loss:3.5805 train_time:209188ms step_avg:151.92ms step:1388/3200 train_loss:3.4173 train_time:209337ms step_avg:151.91ms step:1389/3200 train_loss:3.5603 train_time:209489ms step_avg:151.91ms step:1390/3200 train_loss:3.5345 train_time:209642ms step_avg:151.91ms step:1391/3200 train_loss:3.7924 train_time:209792ms step_avg:151.91ms step:1392/3200 train_loss:3.5084 train_time:209942ms step_avg:151.91ms step:1393/3200 train_loss:3.4992 train_time:210091ms step_avg:151.91ms step:1394/3200 train_loss:3.4587 train_time:210242ms step_avg:151.91ms step:1395/3200 train_loss:3.7450 train_time:210390ms step_avg:151.91ms step:1396/3200 train_loss:3.6388 train_time:210542ms step_avg:151.91ms step:1397/3200 train_loss:3.6448 train_time:210693ms step_avg:151.91ms step:1398/3200 train_loss:3.5094 train_time:210843ms step_avg:151.90ms step:1399/3200 train_loss:3.4852 train_time:210993ms step_avg:151.90ms step:1400/3200 train_loss:3.5413 train_time:211144ms step_avg:151.90ms step:1401/3200 train_loss:3.5303 train_time:211293ms step_avg:151.90ms step:1402/3200 train_loss:3.5543 train_time:211444ms step_avg:151.90ms step:1403/3200 train_loss:3.5118 train_time:211594ms step_avg:151.90ms step:1404/3200 train_loss:3.7471 train_time:211745ms step_avg:151.90ms step:1405/3200 train_loss:3.4925 train_time:211894ms step_avg:151.90ms step:1406/3200 train_loss:3.5327 train_time:212046ms step_avg:151.90ms step:1407/3200 train_loss:3.5253 train_time:212195ms step_avg:151.89ms step:1408/3200 train_loss:3.4003 train_time:212347ms step_avg:151.89ms step:1409/3200 train_loss:3.5185 train_time:212496ms step_avg:151.89ms step:1410/3200 train_loss:3.4925 train_time:212647ms step_avg:151.89ms step:1411/3200 train_loss:3.5010 train_time:212796ms step_avg:151.89ms step:1412/3200 train_loss:3.5826 train_time:212949ms step_avg:151.89ms step:1413/3200 train_loss:3.5256 train_time:213096ms step_avg:151.89ms step:1414/3200 train_loss:3.5723 train_time:213249ms step_avg:151.89ms step:1415/3200 train_loss:3.5606 train_time:213398ms step_avg:151.88ms step:1416/3200 train_loss:3.6364 train_time:213551ms step_avg:151.89ms step:1417/3200 train_loss:3.4409 train_time:213699ms step_avg:151.88ms step:1418/3200 train_loss:3.5018 train_time:213851ms step_avg:151.88ms step:1419/3200 train_loss:3.5966 train_time:213999ms step_avg:151.88ms step:1420/3200 train_loss:3.6194 train_time:214149ms step_avg:151.88ms step:1421/3200 train_loss:3.6051 train_time:214299ms step_avg:151.88ms step:1422/3200 train_loss:3.5926 train_time:214450ms step_avg:151.88ms step:1423/3200 train_loss:3.5713 train_time:214599ms step_avg:151.87ms step:1424/3200 train_loss:3.5502 train_time:214750ms step_avg:151.87ms step:1425/3200 train_loss:3.5576 train_time:214899ms step_avg:151.87ms step:1426/3200 train_loss:3.4296 train_time:215051ms step_avg:151.87ms step:1427/3200 train_loss:3.5433 train_time:215198ms step_avg:151.87ms step:1428/3200 train_loss:3.4881 train_time:215351ms step_avg:151.87ms step:1429/3200 train_loss:3.5995 train_time:215500ms step_avg:151.87ms step:1430/3200 train_loss:3.5575 train_time:215651ms step_avg:151.87ms step:1431/3200 train_loss:3.4917 train_time:215801ms step_avg:151.87ms step:1432/3200 train_loss:3.5394 train_time:215952ms step_avg:151.86ms step:1433/3200 train_loss:3.5746 train_time:216101ms step_avg:151.86ms step:1434/3200 train_loss:3.4454 train_time:216252ms step_avg:151.86ms step:1435/3200 train_loss:3.5481 train_time:216402ms step_avg:151.86ms step:1436/3200 train_loss:3.3685 train_time:216552ms step_avg:151.86ms step:1437/3200 train_loss:3.4387 train_time:216702ms step_avg:151.86ms step:1438/3200 train_loss:3.6246 train_time:216853ms step_avg:151.86ms step:1439/3200 train_loss:3.5883 train_time:217002ms step_avg:151.86ms step:1440/3200 train_loss:3.5338 train_time:217153ms step_avg:151.86ms step:1441/3200 train_loss:3.3925 train_time:217302ms step_avg:151.85ms step:1442/3200 train_loss:3.5589 train_time:217452ms step_avg:151.85ms step:1443/3200 train_loss:3.6260 train_time:217602ms step_avg:151.85ms step:1444/3200 train_loss:3.7052 train_time:217753ms step_avg:151.85ms step:1445/3200 train_loss:3.6614 train_time:217904ms step_avg:151.85ms step:1446/3200 train_loss:3.5514 train_time:218054ms step_avg:151.85ms step:1447/3200 train_loss:3.4254 train_time:218205ms step_avg:151.85ms step:1448/3200 train_loss:3.4984 train_time:218354ms step_avg:151.85ms step:1449/3200 train_loss:3.5163 train_time:218505ms step_avg:151.85ms step:1450/3200 train_loss:3.6293 train_time:218654ms step_avg:151.84ms step:1451/3200 train_loss:3.6199 train_time:218804ms step_avg:151.84ms step:1452/3200 train_loss:3.4372 train_time:218955ms step_avg:151.84ms step:1453/3200 train_loss:3.5552 train_time:219108ms step_avg:151.84ms step:1454/3200 train_loss:3.4676 train_time:219257ms step_avg:151.84ms step:1455/3200 train_loss:3.5048 train_time:219409ms step_avg:151.84ms step:1456/3200 train_loss:3.5540 train_time:219557ms step_avg:151.84ms step:1457/3200 train_loss:3.4814 train_time:219709ms step_avg:151.84ms step:1458/3200 train_loss:3.3785 train_time:219858ms step_avg:151.84ms step:1459/3200 train_loss:3.6299 train_time:220010ms step_avg:151.84ms step:1460/3200 train_loss:3.4929 train_time:220160ms step_avg:151.83ms step:1461/3200 train_loss:3.5431 train_time:220310ms step_avg:151.83ms step:1462/3200 train_loss:3.6674 train_time:220462ms step_avg:151.83ms step:1463/3200 train_loss:3.4919 train_time:220612ms step_avg:151.83ms step:1464/3200 train_loss:3.6833 train_time:220760ms step_avg:151.83ms step:1465/3200 train_loss:3.5723 train_time:220912ms step_avg:151.83ms step:1466/3200 train_loss:3.5846 train_time:221060ms step_avg:151.83ms step:1467/3200 train_loss:3.4956 train_time:221211ms step_avg:151.83ms step:1468/3200 train_loss:3.6547 train_time:221360ms step_avg:151.82ms step:1469/3200 train_loss:3.5187 train_time:221512ms step_avg:151.82ms step:1470/3200 train_loss:3.4935 train_time:221662ms step_avg:151.82ms step:1471/3200 train_loss:3.5405 train_time:221812ms step_avg:151.82ms step:1472/3200 train_loss:3.4635 train_time:221962ms step_avg:151.82ms step:1473/3200 train_loss:3.5552 train_time:222112ms step_avg:151.82ms step:1474/3200 train_loss:3.6485 train_time:222262ms step_avg:151.82ms step:1475/3200 train_loss:3.5259 train_time:222413ms step_avg:151.82ms step:1476/3200 train_loss:3.3549 train_time:222564ms step_avg:151.82ms step:1477/3200 train_loss:3.4811 train_time:222714ms step_avg:151.82ms step:1478/3200 train_loss:3.4520 train_time:222865ms step_avg:151.82ms step:1479/3200 train_loss:3.5333 train_time:223014ms step_avg:151.81ms step:1480/3200 train_loss:3.6172 train_time:223165ms step_avg:151.81ms step:1481/3200 train_loss:3.4886 train_time:223315ms step_avg:151.81ms step:1482/3200 train_loss:3.6649 train_time:223467ms step_avg:151.81ms step:1483/3200 train_loss:3.5881 train_time:223616ms step_avg:151.81ms step:1484/3200 train_loss:3.4953 train_time:223768ms step_avg:151.81ms step:1485/3200 train_loss:3.4859 train_time:223917ms step_avg:151.81ms step:1486/3200 train_loss:3.4873 train_time:224069ms step_avg:151.81ms step:1487/3200 train_loss:3.4660 train_time:224219ms step_avg:151.81ms step:1488/3200 train_loss:3.5478 train_time:224370ms step_avg:151.81ms step:1489/3200 train_loss:3.4556 train_time:224521ms step_avg:151.81ms step:1490/3200 train_loss:3.5441 train_time:224671ms step_avg:151.80ms step:1491/3200 train_loss:3.4810 train_time:224821ms step_avg:151.80ms step:1492/3200 train_loss:3.4062 train_time:224971ms step_avg:151.80ms step:1493/3200 train_loss:3.4821 train_time:225121ms step_avg:151.80ms step:1494/3200 train_loss:3.6543 train_time:225273ms step_avg:151.80ms step:1495/3200 train_loss:3.5075 train_time:225423ms step_avg:151.80ms step:1496/3200 train_loss:3.2631 train_time:225574ms step_avg:151.80ms step:1497/3200 train_loss:3.5698 train_time:225725ms step_avg:151.80ms step:1498/3200 train_loss:3.5316 train_time:225874ms step_avg:151.80ms step:1499/3200 train_loss:3.5796 train_time:226025ms step_avg:151.80ms step:1500/3200 train_loss:3.5317 train_time:226176ms step_avg:151.80ms step:1500/3200 val_loss:3.5088 train_time:226223ms step_avg:151.83ms step:1501/3200 train_loss:3.5153 train_time:226336ms step_avg:151.80ms step:1502/3200 train_loss:3.3110 train_time:226485ms step_avg:151.80ms step:1503/3200 train_loss:3.5886 train_time:226634ms step_avg:151.80ms step:1504/3200 train_loss:3.4561 train_time:226781ms step_avg:151.79ms step:1505/3200 train_loss:3.4664 train_time:226931ms step_avg:151.79ms step:1506/3200 train_loss:3.4294 train_time:227079ms step_avg:151.79ms step:1507/3200 train_loss:3.5111 train_time:227230ms step_avg:151.79ms step:1508/3200 train_loss:3.4269 train_time:227382ms step_avg:151.79ms step:1509/3200 train_loss:3.7333 train_time:227534ms step_avg:151.79ms step:1510/3200 train_loss:3.4824 train_time:227683ms step_avg:151.79ms step:1511/3200 train_loss:3.4923 train_time:227834ms step_avg:151.79ms step:1512/3200 train_loss:3.6144 train_time:228139ms step_avg:151.89ms step:1513/3200 train_loss:3.6436 train_time:228297ms step_avg:151.89ms step:1514/3200 train_loss:3.5006 train_time:228445ms step_avg:151.89ms step:1515/3200 train_loss:3.3497 train_time:228593ms step_avg:151.89ms step:1516/3200 train_loss:3.4611 train_time:228741ms step_avg:151.89ms step:1517/3200 train_loss:3.4687 train_time:228891ms step_avg:151.89ms step:1518/3200 train_loss:3.5441 train_time:229041ms step_avg:151.88ms step:1519/3200 train_loss:3.4264 train_time:229195ms step_avg:151.89ms step:1520/3200 train_loss:3.7258 train_time:229527ms step_avg:152.00ms step:1521/3200 train_loss:3.3821 train_time:229686ms step_avg:152.01ms step:1522/3200 train_loss:3.4453 train_time:229835ms step_avg:152.01ms step:1523/3200 train_loss:3.5934 train_time:229984ms step_avg:152.01ms step:1524/3200 train_loss:3.4507 train_time:230133ms step_avg:152.00ms step:1525/3200 train_loss:3.5502 train_time:230281ms step_avg:152.00ms step:1526/3200 train_loss:3.5426 train_time:230432ms step_avg:152.00ms step:1527/3200 train_loss:3.5056 train_time:230583ms step_avg:152.00ms step:1528/3200 train_loss:3.5073 train_time:230735ms step_avg:152.00ms step:1529/3200 train_loss:3.6523 train_time:230884ms step_avg:152.00ms step:1530/3200 train_loss:3.6225 train_time:231033ms step_avg:152.00ms step:1531/3200 train_loss:3.4576 train_time:231181ms step_avg:151.99ms step:1532/3200 train_loss:3.4148 train_time:231332ms step_avg:151.99ms step:1533/3200 train_loss:3.5851 train_time:231481ms step_avg:151.99ms step:1534/3200 train_loss:3.5232 train_time:231634ms step_avg:151.99ms step:1535/3200 train_loss:3.5124 train_time:231784ms step_avg:151.99ms step:1536/3200 train_loss:3.5105 train_time:231935ms step_avg:151.99ms step:1537/3200 train_loss:3.4468 train_time:232084ms step_avg:151.99ms step:1538/3200 train_loss:3.5014 train_time:232234ms step_avg:151.99ms step:1539/3200 train_loss:3.6771 train_time:232381ms step_avg:151.98ms step:1540/3200 train_loss:3.6137 train_time:232533ms step_avg:151.98ms step:1541/3200 train_loss:3.5184 train_time:232682ms step_avg:151.98ms step:1542/3200 train_loss:3.4696 train_time:232834ms step_avg:151.98ms step:1543/3200 train_loss:3.4713 train_time:232983ms step_avg:151.98ms step:1544/3200 train_loss:3.4304 train_time:233134ms step_avg:151.98ms step:1545/3200 train_loss:3.5204 train_time:233282ms step_avg:151.98ms step:1546/3200 train_loss:3.4879 train_time:233432ms step_avg:151.97ms step:1547/3200 train_loss:3.4741 train_time:233582ms step_avg:151.97ms step:1548/3200 train_loss:3.4291 train_time:233734ms step_avg:151.97ms step:1549/3200 train_loss:3.4669 train_time:233882ms step_avg:151.97ms step:1550/3200 train_loss:3.5805 train_time:234033ms step_avg:151.97ms step:1551/3200 train_loss:3.5067 train_time:234182ms step_avg:151.97ms step:1552/3200 train_loss:3.4450 train_time:234333ms step_avg:151.97ms step:1553/3200 train_loss:3.4443 train_time:234482ms step_avg:151.96ms step:1554/3200 train_loss:3.4365 train_time:234634ms step_avg:151.97ms step:1555/3200 train_loss:3.5610 train_time:234783ms step_avg:151.96ms step:1556/3200 train_loss:3.5635 train_time:234934ms step_avg:151.96ms step:1557/3200 train_loss:3.4968 train_time:235083ms step_avg:151.96ms step:1558/3200 train_loss:3.5506 train_time:235233ms step_avg:151.96ms step:1559/3200 train_loss:3.4781 train_time:235382ms step_avg:151.96ms step:1560/3200 train_loss:3.3940 train_time:235534ms step_avg:151.96ms step:1561/3200 train_loss:3.6359 train_time:235683ms step_avg:151.96ms step:1562/3200 train_loss:3.4542 train_time:235834ms step_avg:151.95ms step:1563/3200 train_loss:3.4334 train_time:235981ms step_avg:151.95ms step:1564/3200 train_loss:3.5590 train_time:236132ms step_avg:151.95ms step:1565/3200 train_loss:3.3819 train_time:236280ms step_avg:151.95ms step:1566/3200 train_loss:3.4366 train_time:236433ms step_avg:151.95ms step:1567/3200 train_loss:3.5889 train_time:236582ms step_avg:151.95ms step:1568/3200 train_loss:3.4662 train_time:236734ms step_avg:151.95ms step:1569/3200 train_loss:3.4551 train_time:236883ms step_avg:151.95ms step:1570/3200 train_loss:3.5524 train_time:237034ms step_avg:151.94ms step:1571/3200 train_loss:3.5553 train_time:237182ms step_avg:151.94ms step:1572/3200 train_loss:3.3852 train_time:237333ms step_avg:151.94ms step:1573/3200 train_loss:3.4190 train_time:237483ms step_avg:151.94ms step:1574/3200 train_loss:3.5332 train_time:237635ms step_avg:151.94ms step:1575/3200 train_loss:3.4040 train_time:237785ms step_avg:151.94ms step:1576/3200 train_loss:3.5548 train_time:237936ms step_avg:151.94ms step:1577/3200 train_loss:3.4562 train_time:238085ms step_avg:151.94ms step:1578/3200 train_loss:3.5118 train_time:238235ms step_avg:151.94ms step:1579/3200 train_loss:3.4894 train_time:238383ms step_avg:151.93ms step:1580/3200 train_loss:3.4555 train_time:238535ms step_avg:151.93ms step:1581/3200 train_loss:3.4279 train_time:238686ms step_avg:151.93ms step:1582/3200 train_loss:3.6746 train_time:238836ms step_avg:151.93ms step:1583/3200 train_loss:3.4456 train_time:238985ms step_avg:151.93ms step:1584/3200 train_loss:3.5981 train_time:239135ms step_avg:151.93ms step:1585/3200 train_loss:3.4266 train_time:239284ms step_avg:151.93ms step:1586/3200 train_loss:3.5873 train_time:239434ms step_avg:151.93ms step:1587/3200 train_loss:3.3780 train_time:239583ms step_avg:151.92ms step:1588/3200 train_loss:3.5678 train_time:239735ms step_avg:151.92ms step:1589/3200 train_loss:3.4853 train_time:239884ms step_avg:151.92ms step:1590/3200 train_loss:3.6300 train_time:240035ms step_avg:151.92ms step:1591/3200 train_loss:3.4513 train_time:240183ms step_avg:151.92ms step:1592/3200 train_loss:3.4670 train_time:240333ms step_avg:151.92ms step:1593/3200 train_loss:3.5372 train_time:240482ms step_avg:151.92ms step:1594/3200 train_loss:3.5096 train_time:240634ms step_avg:151.92ms step:1595/3200 train_loss:3.4840 train_time:240783ms step_avg:151.91ms step:1596/3200 train_loss:3.6305 train_time:240934ms step_avg:151.91ms step:1597/3200 train_loss:3.3590 train_time:241083ms step_avg:151.91ms step:1598/3200 train_loss:3.5234 train_time:241234ms step_avg:151.91ms step:1599/3200 train_loss:3.5648 train_time:241382ms step_avg:151.91ms step:1600/3200 train_loss:3.6045 train_time:241533ms step_avg:151.91ms step:1601/3200 train_loss:3.4641 train_time:241684ms step_avg:151.91ms step:1602/3200 train_loss:3.7560 train_time:241836ms step_avg:151.91ms step:1603/3200 train_loss:3.6424 train_time:241986ms step_avg:151.91ms step:1604/3200 train_loss:3.4257 train_time:242138ms step_avg:151.91ms step:1605/3200 train_loss:3.4578 train_time:242288ms step_avg:151.90ms step:1606/3200 train_loss:3.3464 train_time:242437ms step_avg:151.90ms step:1607/3200 train_loss:3.6655 train_time:242587ms step_avg:151.90ms step:1608/3200 train_loss:3.4695 train_time:242737ms step_avg:151.90ms step:1609/3200 train_loss:3.4929 train_time:242887ms step_avg:151.90ms step:1610/3200 train_loss:3.4492 train_time:243038ms step_avg:151.90ms step:1611/3200 train_loss:4.0471 train_time:243190ms step_avg:151.90ms step:1612/3200 train_loss:3.6845 train_time:243339ms step_avg:151.90ms step:1613/3200 train_loss:3.5927 train_time:243491ms step_avg:151.90ms step:1614/3200 train_loss:3.4600 train_time:243641ms step_avg:151.90ms step:1615/3200 train_loss:3.4958 train_time:243792ms step_avg:151.90ms step:1616/3200 train_loss:3.4985 train_time:243942ms step_avg:151.89ms step:1617/3200 train_loss:3.4609 train_time:244093ms step_avg:151.89ms step:1618/3200 train_loss:3.5365 train_time:244242ms step_avg:151.89ms step:1619/3200 train_loss:3.4832 train_time:244393ms step_avg:151.89ms step:1620/3200 train_loss:3.3789 train_time:244541ms step_avg:151.89ms step:1621/3200 train_loss:3.6455 train_time:244694ms step_avg:151.89ms step:1622/3200 train_loss:3.5635 train_time:244845ms step_avg:151.89ms step:1623/3200 train_loss:3.3464 train_time:244995ms step_avg:151.89ms step:1624/3200 train_loss:3.4660 train_time:245144ms step_avg:151.89ms step:1625/3200 train_loss:3.4159 train_time:245294ms step_avg:151.88ms step:1625/3200 val_loss:3.4944 train_time:245340ms step_avg:151.91ms step:1626/3200 train_loss:3.5021 train_time:245451ms step_avg:151.89ms step:1627/3200 train_loss:3.4661 train_time:245603ms step_avg:151.89ms step:1628/3200 train_loss:3.4310 train_time:245751ms step_avg:151.89ms step:1629/3200 train_loss:3.5395 train_time:245901ms step_avg:151.88ms step:1630/3200 train_loss:3.4342 train_time:246048ms step_avg:151.88ms step:1631/3200 train_loss:3.4919 train_time:246197ms step_avg:151.88ms step:1632/3200 train_loss:3.3750 train_time:246346ms step_avg:151.88ms step:1633/3200 train_loss:3.3414 train_time:246501ms step_avg:151.88ms step:1634/3200 train_loss:3.4995 train_time:246650ms step_avg:151.88ms step:1635/3200 train_loss:3.4905 train_time:246802ms step_avg:151.88ms step:1636/3200 train_loss:3.4335 train_time:246950ms step_avg:151.88ms step:1637/3200 train_loss:3.5091 train_time:247101ms step_avg:151.88ms step:1638/3200 train_loss:3.5586 train_time:247249ms step_avg:151.87ms step:1639/3200 train_loss:3.5968 train_time:247401ms step_avg:151.87ms step:1640/3200 train_loss:3.7534 train_time:247550ms step_avg:151.87ms step:1641/3200 train_loss:3.5759 train_time:247702ms step_avg:151.87ms step:1642/3200 train_loss:3.4994 train_time:247852ms step_avg:151.87ms step:1643/3200 train_loss:3.5827 train_time:248003ms step_avg:151.87ms step:1644/3200 train_loss:3.4831 train_time:248153ms step_avg:151.87ms step:1645/3200 train_loss:3.4972 train_time:248302ms step_avg:151.87ms step:1646/3200 train_loss:3.4940 train_time:248452ms step_avg:151.87ms step:1647/3200 train_loss:3.2676 train_time:248603ms step_avg:151.87ms step:1648/3200 train_loss:3.5377 train_time:248753ms step_avg:151.86ms step:1649/3200 train_loss:3.3992 train_time:248903ms step_avg:151.86ms step:1650/3200 train_loss:3.4777 train_time:249054ms step_avg:151.86ms step:1651/3200 train_loss:3.4500 train_time:249203ms step_avg:151.86ms step:1652/3200 train_loss:3.5230 train_time:249352ms step_avg:151.86ms step:1653/3200 train_loss:3.4518 train_time:249503ms step_avg:151.86ms step:1654/3200 train_loss:3.5731 train_time:249651ms step_avg:151.86ms step:1655/3200 train_loss:3.5634 train_time:249803ms step_avg:151.86ms step:1656/3200 train_loss:3.3923 train_time:249953ms step_avg:151.85ms step:1657/3200 train_loss:3.5526 train_time:250104ms step_avg:151.85ms step:1658/3200 train_loss:3.4411 train_time:250253ms step_avg:151.85ms step:1659/3200 train_loss:3.4207 train_time:250403ms step_avg:151.85ms step:1660/3200 train_loss:3.5099 train_time:250553ms step_avg:151.85ms step:1661/3200 train_loss:3.5307 train_time:250703ms step_avg:151.85ms step:1662/3200 train_loss:3.4421 train_time:250853ms step_avg:151.85ms step:1663/3200 train_loss:3.5402 train_time:251003ms step_avg:151.85ms step:1664/3200 train_loss:3.5454 train_time:251152ms step_avg:151.85ms step:1665/3200 train_loss:3.5737 train_time:251303ms step_avg:151.84ms step:1666/3200 train_loss:3.5481 train_time:251453ms step_avg:151.84ms step:1667/3200 train_loss:3.6934 train_time:251604ms step_avg:151.84ms step:1668/3200 train_loss:3.3968 train_time:251754ms step_avg:151.84ms step:1669/3200 train_loss:3.4806 train_time:251904ms step_avg:151.84ms step:1670/3200 train_loss:3.3987 train_time:252054ms step_avg:151.84ms step:1671/3200 train_loss:3.4072 train_time:252205ms step_avg:151.84ms step:1672/3200 train_loss:3.5665 train_time:252356ms step_avg:151.84ms step:1673/3200 train_loss:3.7477 train_time:252506ms step_avg:151.84ms step:1674/3200 train_loss:3.4640 train_time:252658ms step_avg:151.84ms step:1675/3200 train_loss:3.4505 train_time:252806ms step_avg:151.84ms step:1676/3200 train_loss:3.3383 train_time:252956ms step_avg:151.83ms step:1677/3200 train_loss:3.5415 train_time:253105ms step_avg:151.83ms step:1678/3200 train_loss:3.4569 train_time:253256ms step_avg:151.83ms step:1679/3200 train_loss:3.4851 train_time:253407ms step_avg:151.83ms step:1680/3200 train_loss:3.4733 train_time:253559ms step_avg:151.83ms step:1681/3200 train_loss:3.2998 train_time:253708ms step_avg:151.83ms step:1682/3200 train_loss:3.4840 train_time:253860ms step_avg:151.83ms step:1683/3200 train_loss:3.4925 train_time:254010ms step_avg:151.83ms step:1684/3200 train_loss:3.5264 train_time:254162ms step_avg:151.83ms step:1685/3200 train_loss:3.5266 train_time:254311ms step_avg:151.83ms step:1686/3200 train_loss:3.4424 train_time:254462ms step_avg:151.83ms step:1687/3200 train_loss:3.5467 train_time:254610ms step_avg:151.82ms step:1688/3200 train_loss:3.4298 train_time:254762ms step_avg:151.82ms step:1689/3200 train_loss:3.5130 train_time:254910ms step_avg:151.82ms step:1690/3200 train_loss:3.4310 train_time:255062ms step_avg:151.82ms step:1691/3200 train_loss:3.3277 train_time:255211ms step_avg:151.82ms step:1692/3200 train_loss:3.4810 train_time:255363ms step_avg:151.82ms step:1693/3200 train_loss:3.4762 train_time:255511ms step_avg:151.82ms step:1694/3200 train_loss:3.3904 train_time:255663ms step_avg:151.82ms step:1695/3200 train_loss:3.8302 train_time:255812ms step_avg:151.82ms step:1696/3200 train_loss:3.5450 train_time:255962ms step_avg:151.82ms step:1697/3200 train_loss:3.5329 train_time:256110ms step_avg:151.81ms step:1698/3200 train_loss:3.4374 train_time:256261ms step_avg:151.81ms step:1699/3200 train_loss:3.3453 train_time:256410ms step_avg:151.81ms step:1700/3200 train_loss:3.4361 train_time:256561ms step_avg:151.81ms step:1701/3200 train_loss:3.4307 train_time:256871ms step_avg:151.90ms step:1702/3200 train_loss:3.5085 train_time:257025ms step_avg:151.91ms step:1703/3200 train_loss:3.4299 train_time:257175ms step_avg:151.90ms step:1704/3200 train_loss:3.6380 train_time:257323ms step_avg:151.90ms step:1705/3200 train_loss:3.3958 train_time:257471ms step_avg:151.90ms step:1706/3200 train_loss:3.6192 train_time:257621ms step_avg:151.90ms step:1707/3200 train_loss:3.4610 train_time:257771ms step_avg:151.90ms step:1708/3200 train_loss:3.2543 train_time:257923ms step_avg:151.90ms step:1709/3200 train_loss:3.5805 train_time:258074ms step_avg:151.90ms step:1710/3200 train_loss:3.4886 train_time:258390ms step_avg:151.99ms step:1711/3200 train_loss:3.4746 train_time:258547ms step_avg:152.00ms step:1712/3200 train_loss:3.4742 train_time:258697ms step_avg:152.00ms step:1713/3200 train_loss:3.5100 train_time:258844ms step_avg:151.99ms step:1714/3200 train_loss:3.5325 train_time:258993ms step_avg:151.99ms step:1715/3200 train_loss:3.4519 train_time:259141ms step_avg:151.99ms step:1716/3200 train_loss:3.4641 train_time:259293ms step_avg:151.99ms step:1717/3200 train_loss:3.2960 train_time:259448ms step_avg:151.99ms step:1718/3200 train_loss:3.4366 train_time:259600ms step_avg:151.99ms step:1719/3200 train_loss:3.4543 train_time:259749ms step_avg:151.99ms step:1720/3200 train_loss:3.4034 train_time:259901ms step_avg:151.99ms step:1721/3200 train_loss:3.5682 train_time:260050ms step_avg:151.99ms step:1722/3200 train_loss:3.3718 train_time:260200ms step_avg:151.99ms step:1723/3200 train_loss:3.5115 train_time:260350ms step_avg:151.98ms step:1724/3200 train_loss:3.5930 train_time:260503ms step_avg:151.99ms step:1725/3200 train_loss:3.4455 train_time:260651ms step_avg:151.98ms step:1726/3200 train_loss:3.6711 train_time:260802ms step_avg:151.98ms step:1727/3200 train_loss:3.4576 train_time:260950ms step_avg:151.98ms step:1728/3200 train_loss:3.5217 train_time:261101ms step_avg:151.98ms step:1729/3200 train_loss:3.4888 train_time:261250ms step_avg:151.98ms step:1730/3200 train_loss:3.5010 train_time:261403ms step_avg:151.98ms step:1731/3200 train_loss:3.8580 train_time:261552ms step_avg:151.98ms step:1732/3200 train_loss:3.4849 train_time:261703ms step_avg:151.98ms step:1733/3200 train_loss:3.6073 train_time:261853ms step_avg:151.97ms step:1734/3200 train_loss:3.3974 train_time:262004ms step_avg:151.97ms step:1735/3200 train_loss:3.4383 train_time:262154ms step_avg:151.97ms step:1736/3200 train_loss:3.4593 train_time:262304ms step_avg:151.97ms step:1737/3200 train_loss:3.4366 train_time:262453ms step_avg:151.97ms step:1738/3200 train_loss:3.5818 train_time:262603ms step_avg:151.97ms step:1739/3200 train_loss:3.4419 train_time:262754ms step_avg:151.97ms step:1740/3200 train_loss:3.4941 train_time:262904ms step_avg:151.97ms step:1741/3200 train_loss:3.5553 train_time:263054ms step_avg:151.97ms step:1742/3200 train_loss:3.3628 train_time:263204ms step_avg:151.97ms step:1743/3200 train_loss:3.2479 train_time:263353ms step_avg:151.96ms step:1744/3200 train_loss:3.1775 train_time:263504ms step_avg:151.96ms step:1745/3200 train_loss:3.4765 train_time:263654ms step_avg:151.96ms step:1746/3200 train_loss:3.4910 train_time:263805ms step_avg:151.96ms step:1747/3200 train_loss:3.4574 train_time:263955ms step_avg:151.96ms step:1748/3200 train_loss:3.4759 train_time:264104ms step_avg:151.96ms step:1749/3200 train_loss:3.7131 train_time:264253ms step_avg:151.96ms step:1750/3200 train_loss:3.4218 train_time:264404ms step_avg:151.96ms step:1750/3200 val_loss:3.4746 train_time:264450ms step_avg:151.98ms step:1751/3200 train_loss:3.4963 train_time:264558ms step_avg:151.96ms step:1752/3200 train_loss:3.4865 train_time:264711ms step_avg:151.96ms step:1753/3200 train_loss:3.1176 train_time:264859ms step_avg:151.96ms step:1754/3200 train_loss:3.2438 train_time:265009ms step_avg:151.95ms step:1755/3200 train_loss:3.3418 train_time:265156ms step_avg:151.95ms step:1756/3200 train_loss:3.2912 train_time:265306ms step_avg:151.95ms step:1757/3200 train_loss:3.4484 train_time:265456ms step_avg:151.95ms step:1758/3200 train_loss:3.3314 train_time:265611ms step_avg:151.95ms step:1759/3200 train_loss:3.3264 train_time:265761ms step_avg:151.95ms step:1760/3200 train_loss:4.3794 train_time:265911ms step_avg:151.95ms step:1761/3200 train_loss:3.4608 train_time:266060ms step_avg:151.95ms step:1762/3200 train_loss:3.5028 train_time:266209ms step_avg:151.95ms step:1763/3200 train_loss:3.4928 train_time:266358ms step_avg:151.94ms step:1764/3200 train_loss:3.5149 train_time:266511ms step_avg:151.94ms step:1765/3200 train_loss:3.4268 train_time:266662ms step_avg:151.94ms step:1766/3200 train_loss:3.4718 train_time:266813ms step_avg:151.94ms step:1767/3200 train_loss:3.4814 train_time:266963ms step_avg:151.94ms step:1768/3200 train_loss:3.7285 train_time:267112ms step_avg:151.94ms step:1769/3200 train_loss:3.4620 train_time:267261ms step_avg:151.94ms step:1770/3200 train_loss:3.5256 train_time:267411ms step_avg:151.94ms step:1771/3200 train_loss:3.9384 train_time:267561ms step_avg:151.94ms step:1772/3200 train_loss:3.4624 train_time:267713ms step_avg:151.94ms step:1773/3200 train_loss:3.3681 train_time:267864ms step_avg:151.94ms step:1774/3200 train_loss:3.6132 train_time:268013ms step_avg:151.94ms step:1775/3200 train_loss:3.3781 train_time:268164ms step_avg:151.93ms step:1776/3200 train_loss:3.5267 train_time:268313ms step_avg:151.93ms step:1777/3200 train_loss:3.5744 train_time:268463ms step_avg:151.93ms step:1778/3200 train_loss:3.6632 train_time:268612ms step_avg:151.93ms step:1779/3200 train_loss:3.4654 train_time:268764ms step_avg:151.93ms step:1780/3200 train_loss:3.7669 train_time:268914ms step_avg:151.93ms step:1781/3200 train_loss:3.5394 train_time:269065ms step_avg:151.93ms step:1782/3200 train_loss:3.5510 train_time:269214ms step_avg:151.93ms step:1783/3200 train_loss:3.3385 train_time:269362ms step_avg:151.92ms step:1784/3200 train_loss:3.4283 train_time:269514ms step_avg:151.92ms step:1785/3200 train_loss:3.5749 train_time:269664ms step_avg:151.92ms step:1786/3200 train_loss:3.4600 train_time:269815ms step_avg:151.92ms step:1787/3200 train_loss:3.6275 train_time:269966ms step_avg:151.92ms step:1788/3200 train_loss:3.4349 train_time:270115ms step_avg:151.92ms step:1789/3200 train_loss:3.4129 train_time:270265ms step_avg:151.92ms step:1790/3200 train_loss:3.5592 train_time:270414ms step_avg:151.92ms step:1791/3200 train_loss:3.4618 train_time:270566ms step_avg:151.92ms step:1792/3200 train_loss:3.4106 train_time:270715ms step_avg:151.92ms step:1793/3200 train_loss:3.5433 train_time:270867ms step_avg:151.92ms step:1794/3200 train_loss:3.4171 train_time:271017ms step_avg:151.92ms step:1795/3200 train_loss:3.4061 train_time:271169ms step_avg:151.92ms step:1796/3200 train_loss:3.4700 train_time:271318ms step_avg:151.91ms step:1797/3200 train_loss:3.4264 train_time:271470ms step_avg:151.91ms step:1798/3200 train_loss:3.5682 train_time:271619ms step_avg:151.91ms step:1799/3200 train_loss:3.4491 train_time:271772ms step_avg:151.91ms step:1800/3200 train_loss:3.5331 train_time:271922ms step_avg:151.91ms step:1801/3200 train_loss:3.4518 train_time:272071ms step_avg:151.91ms step:1802/3200 train_loss:3.4926 train_time:272222ms step_avg:151.91ms step:1803/3200 train_loss:3.4017 train_time:272372ms step_avg:151.91ms step:1804/3200 train_loss:3.3320 train_time:272523ms step_avg:151.91ms step:1805/3200 train_loss:3.5887 train_time:272672ms step_avg:151.91ms step:1806/3200 train_loss:3.5065 train_time:272823ms step_avg:151.91ms step:1807/3200 train_loss:3.5207 train_time:272973ms step_avg:151.90ms step:1808/3200 train_loss:3.6201 train_time:273123ms step_avg:151.90ms step:1809/3200 train_loss:3.4226 train_time:273273ms step_avg:151.90ms step:1810/3200 train_loss:3.5246 train_time:273421ms step_avg:151.90ms step:1811/3200 train_loss:3.6568 train_time:273572ms step_avg:151.90ms step:1812/3200 train_loss:3.5108 train_time:273722ms step_avg:151.90ms step:1813/3200 train_loss:3.5541 train_time:273873ms step_avg:151.90ms step:1814/3200 train_loss:3.5790 train_time:274023ms step_avg:151.90ms step:1815/3200 train_loss:3.5191 train_time:274172ms step_avg:151.90ms step:1816/3200 train_loss:3.5514 train_time:274322ms step_avg:151.89ms step:1817/3200 train_loss:3.5110 train_time:274473ms step_avg:151.89ms step:1818/3200 train_loss:3.5679 train_time:274623ms step_avg:151.89ms step:1819/3200 train_loss:3.4923 train_time:274773ms step_avg:151.89ms step:1820/3200 train_loss:3.4797 train_time:274923ms step_avg:151.89ms step:1821/3200 train_loss:3.4370 train_time:275073ms step_avg:151.89ms step:1822/3200 train_loss:3.4139 train_time:275222ms step_avg:151.89ms step:1823/3200 train_loss:3.3435 train_time:275373ms step_avg:151.89ms step:1824/3200 train_loss:3.4948 train_time:275524ms step_avg:151.89ms step:1825/3200 train_loss:3.6130 train_time:275675ms step_avg:151.89ms step:1826/3200 train_loss:3.5653 train_time:275825ms step_avg:151.89ms step:1827/3200 train_loss:3.5525 train_time:275974ms step_avg:151.88ms step:1828/3200 train_loss:3.4147 train_time:276123ms step_avg:151.88ms step:1829/3200 train_loss:3.4349 train_time:276273ms step_avg:151.88ms step:1830/3200 train_loss:3.5757 train_time:276423ms step_avg:151.88ms step:1831/3200 train_loss:3.3498 train_time:276574ms step_avg:151.88ms step:1832/3200 train_loss:3.5091 train_time:276724ms step_avg:151.88ms step:1833/3200 train_loss:3.3812 train_time:276872ms step_avg:151.88ms step:1834/3200 train_loss:3.6979 train_time:277024ms step_avg:151.88ms step:1835/3200 train_loss:3.5365 train_time:277173ms step_avg:151.88ms step:1836/3200 train_loss:3.5195 train_time:277324ms step_avg:151.88ms step:1837/3200 train_loss:3.6409 train_time:277475ms step_avg:151.87ms step:1838/3200 train_loss:3.4998 train_time:277626ms step_avg:151.87ms step:1839/3200 train_loss:3.3874 train_time:277774ms step_avg:151.87ms step:1840/3200 train_loss:3.4997 train_time:277924ms step_avg:151.87ms step:1841/3200 train_loss:3.3922 train_time:278075ms step_avg:151.87ms step:1842/3200 train_loss:3.4986 train_time:278226ms step_avg:151.87ms step:1843/3200 train_loss:3.5554 train_time:278375ms step_avg:151.87ms step:1844/3200 train_loss:3.3011 train_time:278525ms step_avg:151.87ms step:1845/3200 train_loss:3.4281 train_time:278675ms step_avg:151.87ms step:1846/3200 train_loss:3.4889 train_time:278826ms step_avg:151.87ms step:1847/3200 train_loss:3.4269 train_time:278974ms step_avg:151.86ms step:1848/3200 train_loss:3.3289 train_time:279126ms step_avg:151.86ms step:1849/3200 train_loss:3.5967 train_time:279276ms step_avg:151.86ms step:1850/3200 train_loss:3.3634 train_time:279426ms step_avg:151.86ms step:1851/3200 train_loss:3.4397 train_time:279575ms step_avg:151.86ms step:1852/3200 train_loss:3.4057 train_time:279727ms step_avg:151.86ms step:1853/3200 train_loss:3.5998 train_time:279876ms step_avg:151.86ms step:1854/3200 train_loss:3.5831 train_time:280027ms step_avg:151.86ms step:1855/3200 train_loss:3.4585 train_time:280176ms step_avg:151.86ms step:1856/3200 train_loss:3.4068 train_time:280328ms step_avg:151.86ms step:1857/3200 train_loss:3.4338 train_time:280477ms step_avg:151.86ms step:1858/3200 train_loss:3.6810 train_time:280629ms step_avg:151.86ms step:1859/3200 train_loss:3.5246 train_time:280778ms step_avg:151.85ms step:1860/3200 train_loss:3.4670 train_time:280931ms step_avg:151.85ms step:1861/3200 train_loss:3.5082 train_time:281080ms step_avg:151.85ms step:1862/3200 train_loss:3.3979 train_time:281232ms step_avg:151.85ms step:1863/3200 train_loss:3.3913 train_time:281381ms step_avg:151.85ms step:1864/3200 train_loss:3.4657 train_time:281533ms step_avg:151.85ms step:1865/3200 train_loss:3.5065 train_time:281681ms step_avg:151.85ms step:1866/3200 train_loss:3.2615 train_time:281832ms step_avg:151.85ms step:1867/3200 train_loss:3.3965 train_time:281982ms step_avg:151.85ms step:1868/3200 train_loss:3.3540 train_time:282133ms step_avg:151.85ms step:1869/3200 train_loss:3.3579 train_time:282283ms step_avg:151.85ms step:1870/3200 train_loss:3.5132 train_time:282434ms step_avg:151.85ms step:1871/3200 train_loss:3.4963 train_time:282583ms step_avg:151.84ms step:1872/3200 train_loss:3.4400 train_time:282734ms step_avg:151.84ms step:1873/3200 train_loss:3.4522 train_time:282883ms step_avg:151.84ms step:1874/3200 train_loss:3.3855 train_time:283034ms step_avg:151.84ms step:1875/3200 train_loss:3.4871 train_time:283183ms step_avg:151.84ms step:1875/3200 val_loss:3.4623 train_time:283229ms step_avg:151.87ms step:1876/3200 train_loss:3.4858 train_time:283342ms step_avg:151.84ms step:1877/3200 train_loss:3.4132 train_time:283493ms step_avg:151.84ms step:1878/3200 train_loss:3.4560 train_time:283644ms step_avg:151.84ms step:1879/3200 train_loss:3.5694 train_time:283791ms step_avg:151.84ms step:1880/3200 train_loss:3.4474 train_time:283940ms step_avg:151.84ms step:1881/3200 train_loss:3.4994 train_time:284088ms step_avg:151.84ms step:1882/3200 train_loss:3.4154 train_time:284239ms step_avg:151.84ms step:1883/3200 train_loss:3.4862 train_time:284392ms step_avg:151.84ms step:1884/3200 train_loss:3.4822 train_time:284543ms step_avg:151.84ms step:1885/3200 train_loss:3.2421 train_time:284692ms step_avg:151.84ms step:1886/3200 train_loss:3.6400 train_time:284844ms step_avg:151.84ms step:1887/3200 train_loss:3.3715 train_time:284992ms step_avg:151.83ms step:1888/3200 train_loss:3.3850 train_time:285143ms step_avg:151.83ms step:1889/3200 train_loss:3.4625 train_time:285293ms step_avg:151.83ms step:1890/3200 train_loss:3.5116 train_time:285602ms step_avg:151.92ms step:1891/3200 train_loss:3.3258 train_time:285760ms step_avg:151.92ms step:1892/3200 train_loss:3.6044 train_time:285909ms step_avg:151.92ms step:1893/3200 train_loss:3.3539 train_time:286058ms step_avg:151.92ms step:1894/3200 train_loss:3.4942 train_time:286206ms step_avg:151.91ms step:1895/3200 train_loss:3.5243 train_time:286355ms step_avg:151.91ms step:1896/3200 train_loss:3.3316 train_time:286506ms step_avg:151.91ms step:1897/3200 train_loss:3.4911 train_time:286658ms step_avg:151.91ms step:1898/3200 train_loss:3.4521 train_time:286807ms step_avg:151.91ms step:1899/3200 train_loss:3.5290 train_time:286956ms step_avg:151.91ms step:1900/3200 train_loss:3.3088 train_time:287278ms step_avg:152.00ms step:1901/3200 train_loss:3.5496 train_time:287437ms step_avg:152.00ms step:1902/3200 train_loss:3.4357 train_time:287586ms step_avg:152.00ms step:1903/3200 train_loss:3.6000 train_time:287736ms step_avg:152.00ms step:1904/3200 train_loss:3.3999 train_time:287885ms step_avg:152.00ms step:1905/3200 train_loss:3.6778 train_time:288033ms step_avg:152.00ms step:1906/3200 train_loss:3.4133 train_time:288185ms step_avg:152.00ms step:1907/3200 train_loss:3.4064 train_time:288337ms step_avg:152.00ms step:1908/3200 train_loss:3.4851 train_time:288488ms step_avg:152.00ms step:1909/3200 train_loss:3.3613 train_time:288638ms step_avg:151.99ms step:1910/3200 train_loss:3.4337 train_time:288787ms step_avg:151.99ms step:1911/3200 train_loss:3.5270 train_time:288936ms step_avg:151.99ms step:1912/3200 train_loss:3.4570 train_time:289086ms step_avg:151.99ms step:1913/3200 train_loss:3.3328 train_time:289236ms step_avg:151.99ms step:1914/3200 train_loss:3.2017 train_time:289388ms step_avg:151.99ms step:1915/3200 train_loss:3.4056 train_time:289538ms step_avg:151.99ms step:1916/3200 train_loss:3.6115 train_time:289689ms step_avg:151.99ms step:1917/3200 train_loss:3.6201 train_time:289839ms step_avg:151.99ms step:1918/3200 train_loss:3.5660 train_time:289990ms step_avg:151.99ms step:1919/3200 train_loss:3.3928 train_time:290141ms step_avg:151.99ms step:1920/3200 train_loss:3.6437 train_time:290290ms step_avg:151.98ms step:1921/3200 train_loss:3.4613 train_time:290442ms step_avg:151.98ms step:1922/3200 train_loss:3.4036 train_time:290591ms step_avg:151.98ms step:1923/3200 train_loss:3.5752 train_time:290743ms step_avg:151.98ms step:1924/3200 train_loss:3.5384 train_time:290891ms step_avg:151.98ms step:1925/3200 train_loss:3.3783 train_time:291042ms step_avg:151.98ms step:1926/3200 train_loss:3.4075 train_time:291192ms step_avg:151.98ms step:1927/3200 train_loss:3.3181 train_time:291344ms step_avg:151.98ms step:1928/3200 train_loss:3.4304 train_time:291495ms step_avg:151.98ms step:1929/3200 train_loss:3.2830 train_time:291645ms step_avg:151.98ms step:1930/3200 train_loss:3.4099 train_time:291795ms step_avg:151.98ms step:1931/3200 train_loss:3.5329 train_time:291945ms step_avg:151.98ms step:1932/3200 train_loss:3.4056 train_time:292094ms step_avg:151.97ms step:1933/3200 train_loss:3.5510 train_time:292245ms step_avg:151.97ms step:1934/3200 train_loss:3.4109 train_time:292394ms step_avg:151.97ms step:1935/3200 train_loss:3.4638 train_time:292546ms step_avg:151.97ms step:1936/3200 train_loss:3.4940 train_time:292697ms step_avg:151.97ms step:1937/3200 train_loss:3.4620 train_time:292846ms step_avg:151.97ms step:1938/3200 train_loss:3.4878 train_time:292995ms step_avg:151.97ms step:1939/3200 train_loss:3.4102 train_time:293146ms step_avg:151.97ms step:1940/3200 train_loss:3.5058 train_time:293295ms step_avg:151.97ms step:1941/3200 train_loss:3.5397 train_time:293446ms step_avg:151.97ms step:1942/3200 train_loss:3.3779 train_time:293595ms step_avg:151.96ms step:1943/3200 train_loss:3.4165 train_time:293747ms step_avg:151.96ms step:1944/3200 train_loss:3.4792 train_time:293897ms step_avg:151.96ms step:1945/3200 train_loss:3.3310 train_time:294047ms step_avg:151.96ms step:1946/3200 train_loss:3.5929 train_time:294197ms step_avg:151.96ms step:1947/3200 train_loss:3.4625 train_time:294347ms step_avg:151.96ms step:1948/3200 train_loss:3.4487 train_time:294497ms step_avg:151.96ms step:1949/3200 train_loss:3.4464 train_time:294648ms step_avg:151.96ms step:1950/3200 train_loss:3.3331 train_time:294799ms step_avg:151.96ms step:1951/3200 train_loss:3.4496 train_time:294949ms step_avg:151.96ms step:1952/3200 train_loss:3.2977 train_time:295099ms step_avg:151.96ms step:1953/3200 train_loss:3.5066 train_time:295248ms step_avg:151.95ms step:1954/3200 train_loss:3.4971 train_time:295398ms step_avg:151.95ms step:1955/3200 train_loss:3.4523 train_time:295548ms step_avg:151.95ms step:1956/3200 train_loss:3.3396 train_time:295700ms step_avg:151.95ms step:1957/3200 train_loss:3.4335 train_time:295849ms step_avg:151.95ms step:1958/3200 train_loss:3.6055 train_time:296001ms step_avg:151.95ms step:1959/3200 train_loss:3.5337 train_time:296151ms step_avg:151.95ms step:1960/3200 train_loss:3.5607 train_time:296302ms step_avg:151.95ms step:1961/3200 train_loss:3.3528 train_time:296452ms step_avg:151.95ms step:1962/3200 train_loss:3.4784 train_time:296604ms step_avg:151.95ms step:1963/3200 train_loss:3.5239 train_time:296754ms step_avg:151.95ms step:1964/3200 train_loss:3.4678 train_time:296906ms step_avg:151.95ms step:1965/3200 train_loss:3.3836 train_time:297055ms step_avg:151.95ms step:1966/3200 train_loss:3.7913 train_time:297206ms step_avg:151.95ms step:1967/3200 train_loss:3.3938 train_time:297356ms step_avg:151.94ms step:1968/3200 train_loss:3.4420 train_time:297506ms step_avg:151.94ms step:1969/3200 train_loss:3.4912 train_time:297655ms step_avg:151.94ms step:1970/3200 train_loss:3.4426 train_time:297807ms step_avg:151.94ms step:1971/3200 train_loss:3.3353 train_time:297956ms step_avg:151.94ms step:1972/3200 train_loss:3.3149 train_time:298107ms step_avg:151.94ms step:1973/3200 train_loss:3.4384 train_time:298258ms step_avg:151.94ms step:1974/3200 train_loss:3.4054 train_time:298407ms step_avg:151.94ms step:1975/3200 train_loss:3.3875 train_time:298558ms step_avg:151.94ms step:1976/3200 train_loss:3.5398 train_time:298708ms step_avg:151.94ms step:1977/3200 train_loss:3.4103 train_time:298858ms step_avg:151.94ms step:1978/3200 train_loss:3.7737 train_time:299008ms step_avg:151.93ms step:1979/3200 train_loss:3.4567 train_time:299159ms step_avg:151.93ms step:1980/3200 train_loss:3.4577 train_time:299308ms step_avg:151.93ms step:1981/3200 train_loss:3.4639 train_time:299458ms step_avg:151.93ms step:1982/3200 train_loss:3.4874 train_time:299608ms step_avg:151.93ms step:1983/3200 train_loss:3.4202 train_time:299758ms step_avg:151.93ms step:1984/3200 train_loss:3.3802 train_time:299908ms step_avg:151.93ms step:1985/3200 train_loss:3.4415 train_time:300059ms step_avg:151.93ms step:1986/3200 train_loss:3.5015 train_time:300209ms step_avg:151.93ms step:1987/3200 train_loss:3.4711 train_time:300361ms step_avg:151.93ms step:1988/3200 train_loss:3.4463 train_time:300509ms step_avg:151.93ms step:1989/3200 train_loss:3.5256 train_time:300662ms step_avg:151.93ms step:1990/3200 train_loss:3.5583 train_time:300811ms step_avg:151.92ms step:1991/3200 train_loss:3.3436 train_time:300963ms step_avg:151.92ms step:1992/3200 train_loss:3.3348 train_time:301111ms step_avg:151.92ms step:1993/3200 train_loss:3.5157 train_time:301265ms step_avg:151.92ms step:1994/3200 train_loss:3.3438 train_time:301414ms step_avg:151.92ms step:1995/3200 train_loss:3.4236 train_time:301566ms step_avg:151.92ms step:1996/3200 train_loss:3.5035 train_time:301715ms step_avg:151.92ms step:1997/3200 train_loss:3.3692 train_time:301867ms step_avg:151.92ms step:1998/3200 train_loss:3.4676 train_time:302015ms step_avg:151.92ms step:1999/3200 train_loss:3.4699 train_time:302166ms step_avg:151.92ms step:2000/3200 train_loss:3.3940 train_time:302315ms step_avg:151.92ms step:2000/3200 val_loss:3.4475 train_time:302362ms step_avg:151.94ms step:2001/3200 train_loss:3.5413 train_time:302473ms step_avg:151.92ms step:2002/3200 train_loss:3.4783 train_time:302626ms step_avg:151.92ms step:2003/3200 train_loss:3.5709 train_time:302776ms step_avg:151.92ms step:2004/3200 train_loss:3.4845 train_time:302923ms step_avg:151.92ms step:2005/3200 train_loss:3.5002 train_time:303073ms step_avg:151.92ms step:2006/3200 train_loss:3.3848 train_time:303221ms step_avg:151.91ms step:2007/3200 train_loss:3.4139 train_time:303371ms step_avg:151.91ms step:2008/3200 train_loss:3.4663 train_time:303525ms step_avg:151.91ms step:2009/3200 train_loss:3.4990 train_time:303677ms step_avg:151.91ms step:2010/3200 train_loss:3.3997 train_time:303825ms step_avg:151.91ms step:2011/3200 train_loss:3.4810 train_time:303976ms step_avg:151.91ms step:2012/3200 train_loss:3.4549 train_time:304124ms step_avg:151.91ms step:2013/3200 train_loss:3.4587 train_time:304274ms step_avg:151.91ms step:2014/3200 train_loss:3.3781 train_time:304424ms step_avg:151.91ms step:2015/3200 train_loss:3.4193 train_time:304576ms step_avg:151.91ms step:2016/3200 train_loss:3.4466 train_time:304727ms step_avg:151.91ms step:2017/3200 train_loss:3.5701 train_time:304878ms step_avg:151.91ms step:2018/3200 train_loss:3.4293 train_time:305026ms step_avg:151.91ms step:2019/3200 train_loss:3.5673 train_time:305177ms step_avg:151.90ms step:2020/3200 train_loss:3.5871 train_time:305325ms step_avg:151.90ms step:2021/3200 train_loss:3.2926 train_time:305478ms step_avg:151.90ms step:2022/3200 train_loss:3.5188 train_time:305628ms step_avg:151.90ms step:2023/3200 train_loss:3.4506 train_time:305779ms step_avg:151.90ms step:2024/3200 train_loss:3.5468 train_time:305930ms step_avg:151.90ms step:2025/3200 train_loss:3.5858 train_time:306080ms step_avg:151.90ms step:2026/3200 train_loss:3.3755 train_time:306229ms step_avg:151.90ms step:2027/3200 train_loss:3.4052 train_time:306379ms step_avg:151.90ms step:2028/3200 train_loss:3.3176 train_time:306530ms step_avg:151.90ms step:2029/3200 train_loss:3.4304 train_time:306681ms step_avg:151.90ms step:2030/3200 train_loss:3.3507 train_time:306830ms step_avg:151.90ms step:2031/3200 train_loss:3.4404 train_time:306979ms step_avg:151.89ms step:2032/3200 train_loss:3.4352 train_time:307129ms step_avg:151.89ms step:2033/3200 train_loss:3.4511 train_time:307279ms step_avg:151.89ms step:2034/3200 train_loss:3.3448 train_time:307428ms step_avg:151.89ms step:2035/3200 train_loss:3.5068 train_time:307579ms step_avg:151.89ms step:2036/3200 train_loss:3.5093 train_time:307730ms step_avg:151.89ms step:2037/3200 train_loss:3.4953 train_time:307880ms step_avg:151.89ms step:2038/3200 train_loss:3.3708 train_time:308030ms step_avg:151.89ms step:2039/3200 train_loss:3.6244 train_time:308180ms step_avg:151.89ms step:2040/3200 train_loss:3.4602 train_time:308330ms step_avg:151.89ms step:2041/3200 train_loss:3.4747 train_time:308481ms step_avg:151.89ms step:2042/3200 train_loss:3.4308 train_time:308632ms step_avg:151.89ms step:2043/3200 train_loss:3.3297 train_time:308782ms step_avg:151.89ms step:2044/3200 train_loss:3.4515 train_time:308933ms step_avg:151.88ms step:2045/3200 train_loss:3.4430 train_time:309083ms step_avg:151.88ms step:2046/3200 train_loss:3.3163 train_time:309233ms step_avg:151.88ms step:2047/3200 train_loss:3.3921 train_time:309382ms step_avg:151.88ms step:2048/3200 train_loss:3.4697 train_time:309533ms step_avg:151.88ms step:2049/3200 train_loss:3.4178 train_time:309682ms step_avg:151.88ms step:2050/3200 train_loss:3.4708 train_time:309833ms step_avg:151.88ms step:2051/3200 train_loss:3.6085 train_time:309983ms step_avg:151.88ms step:2052/3200 train_loss:3.4766 train_time:310136ms step_avg:151.88ms step:2053/3200 train_loss:3.4255 train_time:310285ms step_avg:151.88ms step:2054/3200 train_loss:3.4067 train_time:310437ms step_avg:151.88ms step:2055/3200 train_loss:3.2752 train_time:310586ms step_avg:151.88ms step:2056/3200 train_loss:3.3877 train_time:310738ms step_avg:151.88ms step:2057/3200 train_loss:3.5603 train_time:310888ms step_avg:151.87ms step:2058/3200 train_loss:3.5864 train_time:311038ms step_avg:151.87ms step:2059/3200 train_loss:3.4465 train_time:311188ms step_avg:151.87ms step:2060/3200 train_loss:3.4833 train_time:311339ms step_avg:151.87ms step:2061/3200 train_loss:3.4716 train_time:311489ms step_avg:151.87ms step:2062/3200 train_loss:3.4247 train_time:311639ms step_avg:151.87ms step:2063/3200 train_loss:3.3406 train_time:311790ms step_avg:151.87ms step:2064/3200 train_loss:3.6420 train_time:311940ms step_avg:151.87ms step:2065/3200 train_loss:3.5039 train_time:312091ms step_avg:151.87ms step:2066/3200 train_loss:3.4558 train_time:312240ms step_avg:151.87ms step:2067/3200 train_loss:3.4996 train_time:312390ms step_avg:151.87ms step:2068/3200 train_loss:3.4038 train_time:312540ms step_avg:151.87ms step:2069/3200 train_loss:3.4590 train_time:312690ms step_avg:151.86ms step:2070/3200 train_loss:3.5868 train_time:312840ms step_avg:151.86ms step:2071/3200 train_loss:3.5929 train_time:312991ms step_avg:151.86ms step:2072/3200 train_loss:3.4397 train_time:313141ms step_avg:151.86ms step:2073/3200 train_loss:3.4815 train_time:313291ms step_avg:151.86ms step:2074/3200 train_loss:3.3663 train_time:313440ms step_avg:151.86ms step:2075/3200 train_loss:3.8926 train_time:313591ms step_avg:151.86ms step:2076/3200 train_loss:3.3199 train_time:313742ms step_avg:151.86ms step:2077/3200 train_loss:3.4872 train_time:313892ms step_avg:151.86ms step:2078/3200 train_loss:3.3749 train_time:314042ms step_avg:151.86ms step:2079/3200 train_loss:3.3564 train_time:314356ms step_avg:151.94ms step:2080/3200 train_loss:3.4451 train_time:314509ms step_avg:151.94ms step:2081/3200 train_loss:3.6953 train_time:314659ms step_avg:151.94ms step:2082/3200 train_loss:3.3245 train_time:314807ms step_avg:151.93ms step:2083/3200 train_loss:3.6625 train_time:314956ms step_avg:151.93ms step:2084/3200 train_loss:3.3679 train_time:315104ms step_avg:151.93ms step:2085/3200 train_loss:3.3586 train_time:315254ms step_avg:151.93ms step:2086/3200 train_loss:3.5981 train_time:315405ms step_avg:151.93ms step:2087/3200 train_loss:3.5250 train_time:315557ms step_avg:151.93ms step:2088/3200 train_loss:3.5105 train_time:315706ms step_avg:151.93ms step:2089/3200 train_loss:3.5693 train_time:315857ms step_avg:151.93ms step:2090/3200 train_loss:3.4978 train_time:316175ms step_avg:152.01ms step:2091/3200 train_loss:3.4801 train_time:316325ms step_avg:152.01ms step:2092/3200 train_loss:3.4355 train_time:316474ms step_avg:152.00ms step:2093/3200 train_loss:3.4994 train_time:316623ms step_avg:152.00ms step:2094/3200 train_loss:3.4146 train_time:316771ms step_avg:152.00ms step:2095/3200 train_loss:3.2050 train_time:316920ms step_avg:152.00ms step:2096/3200 train_loss:3.4317 train_time:317070ms step_avg:152.00ms step:2097/3200 train_loss:3.5976 train_time:317225ms step_avg:152.00ms step:2098/3200 train_loss:3.4227 train_time:317376ms step_avg:152.00ms step:2099/3200 train_loss:3.3221 train_time:317525ms step_avg:152.00ms step:2100/3200 train_loss:3.4235 train_time:317675ms step_avg:152.00ms step:2101/3200 train_loss:3.3740 train_time:317823ms step_avg:152.00ms step:2102/3200 train_loss:3.5153 train_time:317974ms step_avg:152.00ms step:2103/3200 train_loss:3.3564 train_time:318124ms step_avg:151.99ms step:2104/3200 train_loss:3.3197 train_time:318277ms step_avg:151.99ms step:2105/3200 train_loss:3.5774 train_time:318428ms step_avg:151.99ms step:2106/3200 train_loss:3.3063 train_time:318578ms step_avg:151.99ms step:2107/3200 train_loss:3.7092 train_time:318726ms step_avg:151.99ms step:2108/3200 train_loss:3.5413 train_time:318876ms step_avg:151.99ms step:2109/3200 train_loss:3.4471 train_time:319026ms step_avg:151.99ms step:2110/3200 train_loss:3.4718 train_time:319178ms step_avg:151.99ms step:2111/3200 train_loss:3.2851 train_time:319329ms step_avg:151.99ms step:2112/3200 train_loss:3.7669 train_time:319480ms step_avg:151.99ms step:2113/3200 train_loss:3.4607 train_time:319629ms step_avg:151.99ms step:2114/3200 train_loss:3.3946 train_time:319779ms step_avg:151.99ms step:2115/3200 train_loss:3.5065 train_time:319927ms step_avg:151.98ms step:2116/3200 train_loss:3.4621 train_time:320079ms step_avg:151.98ms step:2117/3200 train_loss:3.4521 train_time:320229ms step_avg:151.98ms step:2118/3200 train_loss:3.5098 train_time:320380ms step_avg:151.98ms step:2119/3200 train_loss:3.3633 train_time:320533ms step_avg:151.98ms step:2120/3200 train_loss:3.4308 train_time:320684ms step_avg:151.98ms step:2121/3200 train_loss:3.1291 train_time:320834ms step_avg:151.98ms step:2122/3200 train_loss:3.3304 train_time:320983ms step_avg:151.98ms step:2123/3200 train_loss:3.4927 train_time:321135ms step_avg:151.98ms step:2124/3200 train_loss:3.4045 train_time:321285ms step_avg:151.98ms step:2125/3200 train_loss:3.5709 train_time:321437ms step_avg:151.98ms step:2125/3200 val_loss:3.4367 train_time:321483ms step_avg:152.00ms step:2126/3200 train_loss:3.4222 train_time:321595ms step_avg:151.98ms step:2127/3200 train_loss:3.5331 train_time:321748ms step_avg:151.98ms step:2128/3200 train_loss:3.5152 train_time:321898ms step_avg:151.98ms step:2129/3200 train_loss:3.3930 train_time:322047ms step_avg:151.98ms step:2130/3200 train_loss:3.3652 train_time:322196ms step_avg:151.98ms step:2131/3200 train_loss:3.3882 train_time:322346ms step_avg:151.98ms step:2132/3200 train_loss:3.5450 train_time:322494ms step_avg:151.98ms step:2133/3200 train_loss:3.4208 train_time:322648ms step_avg:151.98ms step:2134/3200 train_loss:3.3319 train_time:322798ms step_avg:151.98ms step:2135/3200 train_loss:3.3873 train_time:322949ms step_avg:151.98ms step:2136/3200 train_loss:3.5155 train_time:323099ms step_avg:151.98ms step:2137/3200 train_loss:3.5320 train_time:323249ms step_avg:151.97ms step:2138/3200 train_loss:3.4724 train_time:323397ms step_avg:151.97ms step:2139/3200 train_loss:3.4664 train_time:323548ms step_avg:151.97ms step:2140/3200 train_loss:3.4493 train_time:323699ms step_avg:151.97ms step:2141/3200 train_loss:3.5349 train_time:323850ms step_avg:151.97ms step:2142/3200 train_loss:3.8262 train_time:323999ms step_avg:151.97ms step:2143/3200 train_loss:3.3653 train_time:324150ms step_avg:151.97ms step:2144/3200 train_loss:3.3977 train_time:324299ms step_avg:151.97ms step:2145/3200 train_loss:3.4412 train_time:324449ms step_avg:151.97ms step:2146/3200 train_loss:3.5681 train_time:324599ms step_avg:151.97ms step:2147/3200 train_loss:3.4916 train_time:324750ms step_avg:151.97ms step:2148/3200 train_loss:3.9072 train_time:324900ms step_avg:151.96ms step:2149/3200 train_loss:3.4278 train_time:325052ms step_avg:151.96ms step:2150/3200 train_loss:3.3894 train_time:325203ms step_avg:151.96ms step:2151/3200 train_loss:3.4656 train_time:325353ms step_avg:151.96ms step:2152/3200 train_loss:3.4887 train_time:325504ms step_avg:151.96ms step:2153/3200 train_loss:3.4399 train_time:325653ms step_avg:151.96ms step:2154/3200 train_loss:3.3789 train_time:325805ms step_avg:151.96ms step:2155/3200 train_loss:3.5946 train_time:325954ms step_avg:151.96ms step:2156/3200 train_loss:3.2118 train_time:326106ms step_avg:151.96ms step:2157/3200 train_loss:3.3761 train_time:326256ms step_avg:151.96ms step:2158/3200 train_loss:3.5124 train_time:326407ms step_avg:151.96ms step:2159/3200 train_loss:3.4481 train_time:326556ms step_avg:151.96ms step:2160/3200 train_loss:3.6102 train_time:326707ms step_avg:151.96ms step:2161/3200 train_loss:3.5123 train_time:326856ms step_avg:151.96ms step:2162/3200 train_loss:3.4439 train_time:327007ms step_avg:151.95ms step:2163/3200 train_loss:3.4198 train_time:327156ms step_avg:151.95ms step:2164/3200 train_loss:3.4121 train_time:327307ms step_avg:151.95ms step:2165/3200 train_loss:3.4977 train_time:327456ms step_avg:151.95ms step:2166/3200 train_loss:3.5175 train_time:327607ms step_avg:151.95ms step:2167/3200 train_loss:3.4461 train_time:327756ms step_avg:151.95ms step:2168/3200 train_loss:3.3490 train_time:327907ms step_avg:151.95ms step:2169/3200 train_loss:3.4350 train_time:328056ms step_avg:151.95ms step:2170/3200 train_loss:3.4693 train_time:328208ms step_avg:151.95ms step:2171/3200 train_loss:3.5930 train_time:328357ms step_avg:151.95ms step:2172/3200 train_loss:3.3873 train_time:328507ms step_avg:151.95ms step:2173/3200 train_loss:3.3759 train_time:328656ms step_avg:151.94ms step:2174/3200 train_loss:3.3888 train_time:328807ms step_avg:151.94ms step:2175/3200 train_loss:3.4381 train_time:328956ms step_avg:151.94ms step:2176/3200 train_loss:3.4016 train_time:329108ms step_avg:151.94ms step:2177/3200 train_loss:3.3772 train_time:329258ms step_avg:151.94ms step:2178/3200 train_loss:3.5902 train_time:329410ms step_avg:151.94ms step:2179/3200 train_loss:3.4171 train_time:329559ms step_avg:151.94ms step:2180/3200 train_loss:3.4348 train_time:329710ms step_avg:151.94ms step:2181/3200 train_loss:3.4855 train_time:329859ms step_avg:151.94ms step:2182/3200 train_loss:3.4621 train_time:330009ms step_avg:151.94ms step:2183/3200 train_loss:3.4252 train_time:330159ms step_avg:151.94ms step:2184/3200 train_loss:3.3245 train_time:330310ms step_avg:151.94ms step:2185/3200 train_loss:3.5064 train_time:330460ms step_avg:151.94ms step:2186/3200 train_loss:3.6679 train_time:330610ms step_avg:151.93ms step:2187/3200 train_loss:3.3126 train_time:330761ms step_avg:151.93ms step:2188/3200 train_loss:3.3537 train_time:330911ms step_avg:151.93ms step:2189/3200 train_loss:3.2034 train_time:331061ms step_avg:151.93ms step:2190/3200 train_loss:3.3583 train_time:331210ms step_avg:151.93ms step:2191/3200 train_loss:3.5017 train_time:331361ms step_avg:151.93ms step:2192/3200 train_loss:3.4376 train_time:331510ms step_avg:151.93ms step:2193/3200 train_loss:3.6740 train_time:331661ms step_avg:151.93ms step:2194/3200 train_loss:3.4411 train_time:331810ms step_avg:151.93ms step:2195/3200 train_loss:3.5005 train_time:331961ms step_avg:151.93ms step:2196/3200 train_loss:3.4462 train_time:332112ms step_avg:151.93ms step:2197/3200 train_loss:3.3657 train_time:332263ms step_avg:151.93ms step:2198/3200 train_loss:3.4422 train_time:332412ms step_avg:151.93ms step:2199/3200 train_loss:3.3853 train_time:332563ms step_avg:151.92ms step:2200/3200 train_loss:3.3893 train_time:332713ms step_avg:151.92ms step:2201/3200 train_loss:3.4379 train_time:332864ms step_avg:151.92ms step:2202/3200 train_loss:3.4233 train_time:333012ms step_avg:151.92ms step:2203/3200 train_loss:3.4073 train_time:333163ms step_avg:151.92ms step:2204/3200 train_loss:3.9067 train_time:333313ms step_avg:151.92ms step:2205/3200 train_loss:3.3186 train_time:333464ms step_avg:151.92ms step:2206/3200 train_loss:3.4362 train_time:333615ms step_avg:151.92ms step:2207/3200 train_loss:3.4573 train_time:333767ms step_avg:151.92ms step:2208/3200 train_loss:3.4709 train_time:333917ms step_avg:151.92ms step:2209/3200 train_loss:3.3731 train_time:334068ms step_avg:151.92ms step:2210/3200 train_loss:3.4444 train_time:334219ms step_avg:151.92ms step:2211/3200 train_loss:3.4534 train_time:334369ms step_avg:151.92ms step:2212/3200 train_loss:3.4496 train_time:334519ms step_avg:151.92ms step:2213/3200 train_loss:3.4771 train_time:334670ms step_avg:151.92ms step:2214/3200 train_loss:3.3310 train_time:334821ms step_avg:151.91ms step:2215/3200 train_loss:3.3948 train_time:334970ms step_avg:151.91ms step:2216/3200 train_loss:3.5342 train_time:335121ms step_avg:151.91ms step:2217/3200 train_loss:3.4856 train_time:335270ms step_avg:151.91ms step:2218/3200 train_loss:3.4527 train_time:335422ms step_avg:151.91ms step:2219/3200 train_loss:3.4556 train_time:335571ms step_avg:151.91ms step:2220/3200 train_loss:3.3694 train_time:335723ms step_avg:151.91ms step:2221/3200 train_loss:3.6224 train_time:335873ms step_avg:151.91ms step:2222/3200 train_loss:3.5076 train_time:336025ms step_avg:151.91ms step:2223/3200 train_loss:3.5295 train_time:336173ms step_avg:151.91ms step:2224/3200 train_loss:3.4080 train_time:336325ms step_avg:151.91ms step:2225/3200 train_loss:3.5422 train_time:336473ms step_avg:151.91ms step:2226/3200 train_loss:3.2990 train_time:336626ms step_avg:151.91ms step:2227/3200 train_loss:3.5655 train_time:336775ms step_avg:151.91ms step:2228/3200 train_loss:3.5038 train_time:336927ms step_avg:151.91ms step:2229/3200 train_loss:3.3121 train_time:337076ms step_avg:151.90ms step:2230/3200 train_loss:3.6554 train_time:337228ms step_avg:151.90ms step:2231/3200 train_loss:3.3442 train_time:337376ms step_avg:151.90ms step:2232/3200 train_loss:3.8127 train_time:337528ms step_avg:151.90ms step:2233/3200 train_loss:3.4894 train_time:337678ms step_avg:151.90ms step:2234/3200 train_loss:3.4380 train_time:337830ms step_avg:151.90ms step:2235/3200 train_loss:3.4712 train_time:337979ms step_avg:151.90ms step:2236/3200 train_loss:3.2550 train_time:338130ms step_avg:151.90ms step:2237/3200 train_loss:3.2545 train_time:338280ms step_avg:151.90ms step:2238/3200 train_loss:3.4769 train_time:338432ms step_avg:151.90ms step:2239/3200 train_loss:3.5756 train_time:338582ms step_avg:151.90ms step:2240/3200 train_loss:3.2930 train_time:338733ms step_avg:151.90ms step:2241/3200 train_loss:3.3577 train_time:338883ms step_avg:151.90ms step:2242/3200 train_loss:3.5515 train_time:339034ms step_avg:151.90ms step:2243/3200 train_loss:3.5145 train_time:339185ms step_avg:151.90ms step:2244/3200 train_loss:3.3701 train_time:339334ms step_avg:151.90ms step:2245/3200 train_loss:3.4453 train_time:339486ms step_avg:151.90ms step:2246/3200 train_loss:3.4663 train_time:339634ms step_avg:151.89ms step:2247/3200 train_loss:3.2946 train_time:339787ms step_avg:151.89ms step:2248/3200 train_loss:3.3175 train_time:339936ms step_avg:151.89ms step:2249/3200 train_loss:3.5803 train_time:340088ms step_avg:151.89ms step:2250/3200 train_loss:3.3001 train_time:340237ms step_avg:151.89ms step:2250/3200 val_loss:3.4268 train_time:340284ms step_avg:151.91ms step:2251/3200 train_loss:3.3046 train_time:340397ms step_avg:151.90ms step:2252/3200 train_loss:3.3809 train_time:340549ms step_avg:151.90ms step:2253/3200 train_loss:3.3496 train_time:340698ms step_avg:151.89ms step:2254/3200 train_loss:3.4026 train_time:340845ms step_avg:151.89ms step:2255/3200 train_loss:3.4588 train_time:340996ms step_avg:151.89ms step:2256/3200 train_loss:3.3316 train_time:341143ms step_avg:151.89ms step:2257/3200 train_loss:3.6162 train_time:341296ms step_avg:151.89ms step:2258/3200 train_loss:3.4913 train_time:341448ms step_avg:151.89ms step:2259/3200 train_loss:3.8118 train_time:341600ms step_avg:151.89ms step:2260/3200 train_loss:3.4930 train_time:341750ms step_avg:151.89ms step:2261/3200 train_loss:3.5453 train_time:341900ms step_avg:151.89ms step:2262/3200 train_loss:3.4566 train_time:342050ms step_avg:151.89ms step:2263/3200 train_loss:3.4566 train_time:342198ms step_avg:151.89ms step:2264/3200 train_loss:3.2200 train_time:342348ms step_avg:151.88ms step:2265/3200 train_loss:3.3424 train_time:342500ms step_avg:151.88ms step:2266/3200 train_loss:3.5568 train_time:342650ms step_avg:151.88ms step:2267/3200 train_loss:3.2910 train_time:342799ms step_avg:151.88ms step:2268/3200 train_loss:3.3657 train_time:343107ms step_avg:151.95ms step:2269/3200 train_loss:3.3465 train_time:343265ms step_avg:151.95ms step:2270/3200 train_loss:3.3093 train_time:343415ms step_avg:151.95ms step:2271/3200 train_loss:3.7043 train_time:343563ms step_avg:151.95ms step:2272/3200 train_loss:3.3616 train_time:343711ms step_avg:151.95ms step:2273/3200 train_loss:3.3686 train_time:343860ms step_avg:151.95ms step:2274/3200 train_loss:3.4545 train_time:344010ms step_avg:151.95ms step:2275/3200 train_loss:3.4000 train_time:344164ms step_avg:151.95ms step:2276/3200 train_loss:3.4157 train_time:344316ms step_avg:151.95ms step:2277/3200 train_loss:3.2979 train_time:344464ms step_avg:151.95ms step:2278/3200 train_loss:3.4024 train_time:344614ms step_avg:151.95ms step:2279/3200 train_loss:3.5285 train_time:344762ms step_avg:151.94ms step:2280/3200 train_loss:3.3284 train_time:345079ms step_avg:152.02ms step:2281/3200 train_loss:3.3862 train_time:345230ms step_avg:152.02ms step:2282/3200 train_loss:3.4027 train_time:345380ms step_avg:152.02ms step:2283/3200 train_loss:3.5405 train_time:345527ms step_avg:152.01ms step:2284/3200 train_loss:3.4184 train_time:345677ms step_avg:152.01ms step:2285/3200 train_loss:3.4389 train_time:345825ms step_avg:152.01ms step:2286/3200 train_loss:3.4378 train_time:345976ms step_avg:152.01ms step:2287/3200 train_loss:3.4358 train_time:346129ms step_avg:152.01ms step:2288/3200 train_loss:3.3945 train_time:346281ms step_avg:152.01ms step:2289/3200 train_loss:3.5240 train_time:346431ms step_avg:152.01ms step:2290/3200 train_loss:3.4923 train_time:346581ms step_avg:152.01ms step:2291/3200 train_loss:3.3779 train_time:346730ms step_avg:152.01ms step:2292/3200 train_loss:3.7178 train_time:346880ms step_avg:152.01ms step:2293/3200 train_loss:3.3819 train_time:347030ms step_avg:152.01ms step:2294/3200 train_loss:3.3249 train_time:347182ms step_avg:152.01ms step:2295/3200 train_loss:3.5063 train_time:347334ms step_avg:152.01ms step:2296/3200 train_loss:3.4577 train_time:347483ms step_avg:152.00ms step:2297/3200 train_loss:3.4636 train_time:347634ms step_avg:152.00ms step:2298/3200 train_loss:3.8082 train_time:347783ms step_avg:152.00ms step:2299/3200 train_loss:3.3284 train_time:347935ms step_avg:152.00ms step:2300/3200 train_loss:3.3288 train_time:348083ms step_avg:152.00ms step:2301/3200 train_loss:3.6630 train_time:348236ms step_avg:152.00ms step:2302/3200 train_loss:3.3872 train_time:348385ms step_avg:152.00ms step:2303/3200 train_loss:3.4059 train_time:348536ms step_avg:152.00ms step:2304/3200 train_loss:3.3907 train_time:348685ms step_avg:152.00ms step:2305/3200 train_loss:3.3284 train_time:348837ms step_avg:152.00ms step:2306/3200 train_loss:3.4861 train_time:348986ms step_avg:152.00ms step:2307/3200 train_loss:3.3568 train_time:349136ms step_avg:152.00ms step:2308/3200 train_loss:3.3715 train_time:349285ms step_avg:152.00ms step:2309/3200 train_loss:3.4955 train_time:349437ms step_avg:152.00ms step:2310/3200 train_loss:3.4612 train_time:349586ms step_avg:151.99ms step:2311/3200 train_loss:3.3300 train_time:349737ms step_avg:151.99ms step:2312/3200 train_loss:3.4492 train_time:349887ms step_avg:151.99ms step:2313/3200 train_loss:3.5677 train_time:350037ms step_avg:151.99ms step:2314/3200 train_loss:3.3866 train_time:350187ms step_avg:151.99ms step:2315/3200 train_loss:3.3111 train_time:350338ms step_avg:151.99ms step:2316/3200 train_loss:3.3982 train_time:350488ms step_avg:151.99ms step:2317/3200 train_loss:3.2819 train_time:350639ms step_avg:151.99ms step:2318/3200 train_loss:3.3880 train_time:350788ms step_avg:151.99ms step:2319/3200 train_loss:3.4094 train_time:350938ms step_avg:151.99ms step:2320/3200 train_loss:3.2599 train_time:351088ms step_avg:151.99ms step:2321/3200 train_loss:3.3944 train_time:351238ms step_avg:151.99ms step:2322/3200 train_loss:3.4408 train_time:351387ms step_avg:151.98ms step:2323/3200 train_loss:3.3632 train_time:351538ms step_avg:151.98ms step:2324/3200 train_loss:3.4089 train_time:351688ms step_avg:151.98ms step:2325/3200 train_loss:3.3235 train_time:351838ms step_avg:151.98ms step:2326/3200 train_loss:3.4671 train_time:351988ms step_avg:151.98ms step:2327/3200 train_loss:3.4733 train_time:352138ms step_avg:151.98ms step:2328/3200 train_loss:3.2522 train_time:352287ms step_avg:151.98ms step:2329/3200 train_loss:3.3602 train_time:352437ms step_avg:151.98ms step:2330/3200 train_loss:3.3863 train_time:352586ms step_avg:151.98ms step:2331/3200 train_loss:3.3609 train_time:352738ms step_avg:151.98ms step:2332/3200 train_loss:3.5443 train_time:352888ms step_avg:151.98ms step:2333/3200 train_loss:3.4192 train_time:353039ms step_avg:151.98ms step:2334/3200 train_loss:3.4009 train_time:353189ms step_avg:151.97ms step:2335/3200 train_loss:3.4751 train_time:353339ms step_avg:151.97ms step:2336/3200 train_loss:3.3203 train_time:353489ms step_avg:151.97ms step:2337/3200 train_loss:3.4740 train_time:353638ms step_avg:151.97ms step:2338/3200 train_loss:3.4357 train_time:353788ms step_avg:151.97ms step:2339/3200 train_loss:3.3761 train_time:353938ms step_avg:151.97ms step:2340/3200 train_loss:3.4575 train_time:354088ms step_avg:151.97ms step:2341/3200 train_loss:3.5037 train_time:354239ms step_avg:151.97ms step:2342/3200 train_loss:3.3697 train_time:354388ms step_avg:151.97ms step:2343/3200 train_loss:3.3877 train_time:354539ms step_avg:151.97ms step:2344/3200 train_loss:3.4502 train_time:354689ms step_avg:151.97ms step:2345/3200 train_loss:3.3904 train_time:354840ms step_avg:151.97ms step:2346/3200 train_loss:3.5076 train_time:354988ms step_avg:151.96ms step:2347/3200 train_loss:3.4107 train_time:355140ms step_avg:151.96ms step:2348/3200 train_loss:3.5256 train_time:355290ms step_avg:151.96ms step:2349/3200 train_loss:3.4875 train_time:355440ms step_avg:151.96ms step:2350/3200 train_loss:3.5208 train_time:355590ms step_avg:151.96ms step:2351/3200 train_loss:3.2208 train_time:355740ms step_avg:151.96ms step:2352/3200 train_loss:3.3420 train_time:355890ms step_avg:151.96ms step:2353/3200 train_loss:3.3323 train_time:356040ms step_avg:151.96ms step:2354/3200 train_loss:3.5496 train_time:356191ms step_avg:151.96ms step:2355/3200 train_loss:3.3470 train_time:356340ms step_avg:151.96ms step:2356/3200 train_loss:3.3354 train_time:356490ms step_avg:151.96ms step:2357/3200 train_loss:3.4919 train_time:356640ms step_avg:151.96ms step:2358/3200 train_loss:3.3469 train_time:356790ms step_avg:151.95ms step:2359/3200 train_loss:3.4449 train_time:356940ms step_avg:151.95ms step:2360/3200 train_loss:3.3488 train_time:357091ms step_avg:151.95ms step:2361/3200 train_loss:3.3577 train_time:357242ms step_avg:151.95ms step:2362/3200 train_loss:3.3908 train_time:357394ms step_avg:151.95ms step:2363/3200 train_loss:3.4471 train_time:357543ms step_avg:151.95ms step:2364/3200 train_loss:3.3984 train_time:357693ms step_avg:151.95ms step:2365/3200 train_loss:3.8323 train_time:357843ms step_avg:151.95ms step:2366/3200 train_loss:3.4631 train_time:357996ms step_avg:151.95ms step:2367/3200 train_loss:3.6043 train_time:358144ms step_avg:151.95ms step:2368/3200 train_loss:3.4194 train_time:358295ms step_avg:151.95ms step:2369/3200 train_loss:3.4273 train_time:358444ms step_avg:151.95ms step:2370/3200 train_loss:3.4639 train_time:358596ms step_avg:151.95ms step:2371/3200 train_loss:3.3455 train_time:358744ms step_avg:151.95ms step:2372/3200 train_loss:3.5816 train_time:358895ms step_avg:151.95ms step:2373/3200 train_loss:3.4195 train_time:359046ms step_avg:151.94ms step:2374/3200 train_loss:3.9800 train_time:359198ms step_avg:151.95ms step:2375/3200 train_loss:3.4100 train_time:359349ms step_avg:151.94ms step:2375/3200 val_loss:3.4104 train_time:359395ms step_avg:151.96ms step:2376/3200 train_loss:3.3039 train_time:359508ms step_avg:151.95ms step:2377/3200 train_loss:3.4655 train_time:359660ms step_avg:151.95ms step:2378/3200 train_loss:3.4402 train_time:359807ms step_avg:151.95ms step:2379/3200 train_loss:3.4568 train_time:359956ms step_avg:151.94ms step:2380/3200 train_loss:3.4331 train_time:360105ms step_avg:151.94ms step:2381/3200 train_loss:3.3383 train_time:360252ms step_avg:151.94ms step:2382/3200 train_loss:3.4390 train_time:360404ms step_avg:151.94ms step:2383/3200 train_loss:3.4532 train_time:360558ms step_avg:151.94ms step:2384/3200 train_loss:3.3955 train_time:360708ms step_avg:151.94ms step:2385/3200 train_loss:3.3291 train_time:360859ms step_avg:151.94ms step:2386/3200 train_loss:3.4411 train_time:361007ms step_avg:151.94ms step:2387/3200 train_loss:3.3970 train_time:361157ms step_avg:151.94ms step:2388/3200 train_loss:3.4003 train_time:361306ms step_avg:151.94ms step:2389/3200 train_loss:3.4296 train_time:361458ms step_avg:151.94ms step:2390/3200 train_loss:3.4141 train_time:361610ms step_avg:151.94ms step:2391/3200 train_loss:3.4152 train_time:361762ms step_avg:151.94ms step:2392/3200 train_loss:3.2964 train_time:361911ms step_avg:151.94ms step:2393/3200 train_loss:3.5174 train_time:362062ms step_avg:151.94ms step:2394/3200 train_loss:3.3446 train_time:362209ms step_avg:151.93ms step:2395/3200 train_loss:3.4520 train_time:362361ms step_avg:151.93ms step:2396/3200 train_loss:3.5627 train_time:362511ms step_avg:151.93ms step:2397/3200 train_loss:3.5728 train_time:362663ms step_avg:151.93ms step:2398/3200 train_loss:3.5390 train_time:362814ms step_avg:151.93ms step:2399/3200 train_loss:3.4973 train_time:362964ms step_avg:151.93ms step:2400/3200 train_loss:3.3733 train_time:363112ms step_avg:151.93ms step:2401/3200 train_loss:3.3709 train_time:363263ms step_avg:151.93ms step:2402/3200 train_loss:3.4777 train_time:363412ms step_avg:151.93ms step:2403/3200 train_loss:3.3131 train_time:363563ms step_avg:151.93ms step:2404/3200 train_loss:3.4422 train_time:363714ms step_avg:151.93ms step:2405/3200 train_loss:3.6623 train_time:363865ms step_avg:151.93ms step:2406/3200 train_loss:3.3799 train_time:364015ms step_avg:151.93ms step:2407/3200 train_loss:3.5325 train_time:364164ms step_avg:151.92ms step:2408/3200 train_loss:3.3913 train_time:364314ms step_avg:151.92ms step:2409/3200 train_loss:3.3252 train_time:364464ms step_avg:151.92ms step:2410/3200 train_loss:3.4623 train_time:364616ms step_avg:151.92ms step:2411/3200 train_loss:3.2457 train_time:364767ms step_avg:151.92ms step:2412/3200 train_loss:3.6810 train_time:364919ms step_avg:151.92ms step:2413/3200 train_loss:3.3729 train_time:365069ms step_avg:151.92ms step:2414/3200 train_loss:3.4540 train_time:365221ms step_avg:151.92ms step:2415/3200 train_loss:3.3659 train_time:365370ms step_avg:151.92ms step:2416/3200 train_loss:3.4372 train_time:365522ms step_avg:151.92ms step:2417/3200 train_loss:3.2545 train_time:365672ms step_avg:151.92ms step:2418/3200 train_loss:3.1819 train_time:365823ms step_avg:151.92ms step:2419/3200 train_loss:3.4816 train_time:365973ms step_avg:151.92ms step:2420/3200 train_loss:3.3583 train_time:366123ms step_avg:151.92ms step:2421/3200 train_loss:3.3886 train_time:366272ms step_avg:151.92ms step:2422/3200 train_loss:3.4973 train_time:366424ms step_avg:151.92ms step:2423/3200 train_loss:3.5359 train_time:366574ms step_avg:151.92ms step:2424/3200 train_loss:3.3552 train_time:366724ms step_avg:151.92ms step:2425/3200 train_loss:3.4534 train_time:366875ms step_avg:151.91ms step:2426/3200 train_loss:3.4446 train_time:367025ms step_avg:151.91ms step:2427/3200 train_loss:3.3705 train_time:367174ms step_avg:151.91ms step:2428/3200 train_loss:3.3159 train_time:367325ms step_avg:151.91ms step:2429/3200 train_loss:3.4520 train_time:367475ms step_avg:151.91ms step:2430/3200 train_loss:3.3491 train_time:367625ms step_avg:151.91ms step:2431/3200 train_loss:3.3994 train_time:367777ms step_avg:151.91ms step:2432/3200 train_loss:3.4612 train_time:367926ms step_avg:151.91ms step:2433/3200 train_loss:3.4263 train_time:368076ms step_avg:151.91ms step:2434/3200 train_loss:3.2976 train_time:368225ms step_avg:151.91ms step:2435/3200 train_loss:3.2618 train_time:368377ms step_avg:151.91ms step:2436/3200 train_loss:3.4291 train_time:368526ms step_avg:151.91ms step:2437/3200 train_loss:3.2889 train_time:368677ms step_avg:151.91ms step:2438/3200 train_loss:3.3575 train_time:368826ms step_avg:151.91ms step:2439/3200 train_loss:3.4570 train_time:368978ms step_avg:151.91ms step:2440/3200 train_loss:3.3741 train_time:369128ms step_avg:151.90ms step:2441/3200 train_loss:3.4564 train_time:369279ms step_avg:151.90ms step:2442/3200 train_loss:3.3438 train_time:369428ms step_avg:151.90ms step:2443/3200 train_loss:3.4026 train_time:369581ms step_avg:151.90ms step:2444/3200 train_loss:3.2895 train_time:369729ms step_avg:151.90ms step:2445/3200 train_loss:3.2944 train_time:369882ms step_avg:151.90ms step:2446/3200 train_loss:3.4572 train_time:370029ms step_avg:151.90ms step:2447/3200 train_loss:3.3253 train_time:370181ms step_avg:151.90ms step:2448/3200 train_loss:3.3900 train_time:370329ms step_avg:151.90ms step:2449/3200 train_loss:3.5648 train_time:370483ms step_avg:151.90ms step:2450/3200 train_loss:3.3887 train_time:370631ms step_avg:151.90ms step:2451/3200 train_loss:3.4613 train_time:370784ms step_avg:151.90ms step:2452/3200 train_loss:3.3629 train_time:370932ms step_avg:151.90ms step:2453/3200 train_loss:3.4641 train_time:371083ms step_avg:151.90ms step:2454/3200 train_loss:3.3593 train_time:371231ms step_avg:151.90ms step:2455/3200 train_loss:3.4852 train_time:371383ms step_avg:151.89ms step:2456/3200 train_loss:3.4224 train_time:371531ms step_avg:151.89ms step:2457/3200 train_loss:3.3433 train_time:371841ms step_avg:151.96ms step:2458/3200 train_loss:3.2607 train_time:372000ms step_avg:151.96ms step:2459/3200 train_loss:3.3976 train_time:372148ms step_avg:151.96ms step:2460/3200 train_loss:3.9914 train_time:372297ms step_avg:151.96ms step:2461/3200 train_loss:3.4530 train_time:372445ms step_avg:151.96ms step:2462/3200 train_loss:3.2717 train_time:372594ms step_avg:151.96ms step:2463/3200 train_loss:3.4712 train_time:372746ms step_avg:151.95ms step:2464/3200 train_loss:3.3817 train_time:372901ms step_avg:151.96ms step:2465/3200 train_loss:3.5845 train_time:373052ms step_avg:151.96ms step:2466/3200 train_loss:3.7403 train_time:373203ms step_avg:151.96ms step:2467/3200 train_loss:3.5013 train_time:373352ms step_avg:151.95ms step:2468/3200 train_loss:3.3727 train_time:373502ms step_avg:151.95ms step:2469/3200 train_loss:3.4889 train_time:373651ms step_avg:151.95ms step:2470/3200 train_loss:3.5003 train_time:373962ms step_avg:152.02ms step:2471/3200 train_loss:3.2991 train_time:374110ms step_avg:152.02ms step:2472/3200 train_loss:3.3945 train_time:374260ms step_avg:152.01ms step:2473/3200 train_loss:3.3938 train_time:374408ms step_avg:152.01ms step:2474/3200 train_loss:3.5345 train_time:374557ms step_avg:152.01ms step:2475/3200 train_loss:3.6627 train_time:374705ms step_avg:152.01ms step:2476/3200 train_loss:3.2527 train_time:374857ms step_avg:152.01ms step:2477/3200 train_loss:3.4675 train_time:375011ms step_avg:152.01ms step:2478/3200 train_loss:3.4327 train_time:375162ms step_avg:152.01ms step:2479/3200 train_loss:3.2691 train_time:375310ms step_avg:152.01ms step:2480/3200 train_loss:3.2643 train_time:375461ms step_avg:152.01ms step:2481/3200 train_loss:3.4104 train_time:375609ms step_avg:152.01ms step:2482/3200 train_loss:3.4184 train_time:375761ms step_avg:152.01ms step:2483/3200 train_loss:3.4337 train_time:375914ms step_avg:152.01ms step:2484/3200 train_loss:3.3960 train_time:376065ms step_avg:152.01ms step:2485/3200 train_loss:3.3953 train_time:376215ms step_avg:152.01ms step:2486/3200 train_loss:3.2869 train_time:376365ms step_avg:152.01ms step:2487/3200 train_loss:3.4887 train_time:376516ms step_avg:152.00ms step:2488/3200 train_loss:3.4400 train_time:376665ms step_avg:152.00ms step:2489/3200 train_loss:3.3396 train_time:376815ms step_avg:152.00ms step:2490/3200 train_loss:3.4566 train_time:376967ms step_avg:152.00ms step:2491/3200 train_loss:3.5000 train_time:377120ms step_avg:152.00ms step:2492/3200 train_loss:3.5815 train_time:377270ms step_avg:152.00ms step:2493/3200 train_loss:3.4373 train_time:377422ms step_avg:152.00ms step:2494/3200 train_loss:3.3579 train_time:377571ms step_avg:152.00ms step:2495/3200 train_loss:3.4788 train_time:377723ms step_avg:152.00ms step:2496/3200 train_loss:3.4337 train_time:377872ms step_avg:152.00ms step:2497/3200 train_loss:3.3394 train_time:378022ms step_avg:152.00ms step:2498/3200 train_loss:3.4380 train_time:378172ms step_avg:152.00ms step:2499/3200 train_loss:3.4917 train_time:378323ms step_avg:152.00ms step:2500/3200 train_loss:3.5172 train_time:378473ms step_avg:152.00ms step:2500/3200 val_loss:3.3866 train_time:378520ms step_avg:152.02ms step:2501/3200 train_loss:3.4573 train_time:378632ms step_avg:152.00ms step:2502/3200 train_loss:3.4180 train_time:378783ms step_avg:152.00ms step:2503/3200 train_loss:3.4269 train_time:378932ms step_avg:152.00ms step:2504/3200 train_loss:3.2916 train_time:379080ms step_avg:152.00ms step:2505/3200 train_loss:3.4905 train_time:379229ms step_avg:152.00ms step:2506/3200 train_loss:3.4424 train_time:379377ms step_avg:151.99ms step:2507/3200 train_loss:3.3835 train_time:379528ms step_avg:151.99ms step:2508/3200 train_loss:3.3849 train_time:379681ms step_avg:151.99ms step:2509/3200 train_loss:3.3449 train_time:379834ms step_avg:151.99ms step:2510/3200 train_loss:3.5360 train_time:379985ms step_avg:151.99ms step:2511/3200 train_loss:3.3514 train_time:380134ms step_avg:151.99ms step:2512/3200 train_loss:3.3405 train_time:380285ms step_avg:151.99ms step:2513/3200 train_loss:3.4224 train_time:380433ms step_avg:151.99ms step:2514/3200 train_loss:3.4460 train_time:380584ms step_avg:151.99ms step:2515/3200 train_loss:3.3517 train_time:380736ms step_avg:151.99ms step:2516/3200 train_loss:3.4335 train_time:380889ms step_avg:151.99ms step:2517/3200 train_loss:3.4216 train_time:381038ms step_avg:151.99ms step:2518/3200 train_loss:3.3063 train_time:381188ms step_avg:151.99ms step:2519/3200 train_loss:3.3366 train_time:381336ms step_avg:151.99ms step:2520/3200 train_loss:3.4523 train_time:381488ms step_avg:151.99ms step:2521/3200 train_loss:3.4395 train_time:381639ms step_avg:151.99ms step:2522/3200 train_loss:3.3310 train_time:381790ms step_avg:151.99ms step:2523/3200 train_loss:3.3070 train_time:381941ms step_avg:151.99ms step:2524/3200 train_loss:3.4025 train_time:382091ms step_avg:151.99ms step:2525/3200 train_loss:3.2592 train_time:382240ms step_avg:151.98ms step:2526/3200 train_loss:3.4718 train_time:382390ms step_avg:151.98ms step:2527/3200 train_loss:3.3751 train_time:382539ms step_avg:151.98ms step:2528/3200 train_loss:3.3874 train_time:382690ms step_avg:151.98ms step:2529/3200 train_loss:3.3759 train_time:382840ms step_avg:151.98ms step:2530/3200 train_loss:3.3864 train_time:382992ms step_avg:151.98ms step:2531/3200 train_loss:3.4267 train_time:383142ms step_avg:151.98ms step:2532/3200 train_loss:3.2513 train_time:383291ms step_avg:151.98ms step:2533/3200 train_loss:3.4142 train_time:383440ms step_avg:151.98ms step:2534/3200 train_loss:3.3054 train_time:383592ms step_avg:151.98ms step:2535/3200 train_loss:3.3425 train_time:383742ms step_avg:151.98ms step:2536/3200 train_loss:3.4035 train_time:383892ms step_avg:151.98ms step:2537/3200 train_loss:3.4049 train_time:384042ms step_avg:151.98ms step:2538/3200 train_loss:3.2396 train_time:384191ms step_avg:151.97ms step:2539/3200 train_loss:3.5440 train_time:384340ms step_avg:151.97ms step:2540/3200 train_loss:3.2284 train_time:384491ms step_avg:151.97ms step:2541/3200 train_loss:3.4124 train_time:384642ms step_avg:151.97ms step:2542/3200 train_loss:3.1737 train_time:384792ms step_avg:151.97ms step:2543/3200 train_loss:3.6136 train_time:384944ms step_avg:151.97ms step:2544/3200 train_loss:3.3814 train_time:385095ms step_avg:151.97ms step:2545/3200 train_loss:3.5348 train_time:385246ms step_avg:151.97ms step:2546/3200 train_loss:3.3736 train_time:385395ms step_avg:151.97ms step:2547/3200 train_loss:3.3436 train_time:385548ms step_avg:151.97ms step:2548/3200 train_loss:3.3548 train_time:385697ms step_avg:151.97ms step:2549/3200 train_loss:3.5176 train_time:385850ms step_avg:151.97ms step:2550/3200 train_loss:3.3749 train_time:385999ms step_avg:151.97ms step:2551/3200 train_loss:3.3722 train_time:386151ms step_avg:151.97ms step:2552/3200 train_loss:3.4052 train_time:386300ms step_avg:151.97ms step:2553/3200 train_loss:3.4288 train_time:386451ms step_avg:151.97ms step:2554/3200 train_loss:3.3338 train_time:386600ms step_avg:151.97ms step:2555/3200 train_loss:3.4400 train_time:386751ms step_avg:151.97ms step:2556/3200 train_loss:3.4933 train_time:386901ms step_avg:151.96ms step:2557/3200 train_loss:3.4846 train_time:387052ms step_avg:151.96ms step:2558/3200 train_loss:3.3274 train_time:387203ms step_avg:151.96ms step:2559/3200 train_loss:3.3296 train_time:387352ms step_avg:151.96ms step:2560/3200 train_loss:3.3359 train_time:387502ms step_avg:151.96ms step:2561/3200 train_loss:3.4536 train_time:387654ms step_avg:151.96ms step:2562/3200 train_loss:3.4932 train_time:387805ms step_avg:151.96ms step:2563/3200 train_loss:3.3737 train_time:387955ms step_avg:151.96ms step:2564/3200 train_loss:3.4004 train_time:388107ms step_avg:151.96ms step:2565/3200 train_loss:3.3226 train_time:388256ms step_avg:151.96ms step:2566/3200 train_loss:3.3340 train_time:388408ms step_avg:151.96ms step:2567/3200 train_loss:3.3297 train_time:388557ms step_avg:151.96ms step:2568/3200 train_loss:3.3775 train_time:388708ms step_avg:151.96ms step:2569/3200 train_loss:3.5205 train_time:388857ms step_avg:151.96ms step:2570/3200 train_loss:3.4256 train_time:389009ms step_avg:151.96ms step:2571/3200 train_loss:3.5041 train_time:389158ms step_avg:151.96ms step:2572/3200 train_loss:3.2609 train_time:389311ms step_avg:151.96ms step:2573/3200 train_loss:3.3640 train_time:389459ms step_avg:151.95ms step:2574/3200 train_loss:3.0298 train_time:389612ms step_avg:151.95ms step:2575/3200 train_loss:3.2755 train_time:389761ms step_avg:151.95ms step:2576/3200 train_loss:3.2141 train_time:389913ms step_avg:151.95ms step:2577/3200 train_loss:3.3295 train_time:390062ms step_avg:151.95ms step:2578/3200 train_loss:3.3751 train_time:390213ms step_avg:151.95ms step:2579/3200 train_loss:3.2938 train_time:390363ms step_avg:151.95ms step:2580/3200 train_loss:3.3503 train_time:390514ms step_avg:151.95ms step:2581/3200 train_loss:3.2948 train_time:390663ms step_avg:151.95ms step:2582/3200 train_loss:3.3995 train_time:390814ms step_avg:151.95ms step:2583/3200 train_loss:3.2787 train_time:390964ms step_avg:151.95ms step:2584/3200 train_loss:3.4694 train_time:391114ms step_avg:151.95ms step:2585/3200 train_loss:3.3800 train_time:391265ms step_avg:151.95ms step:2586/3200 train_loss:3.3955 train_time:391414ms step_avg:151.95ms step:2587/3200 train_loss:3.5207 train_time:391566ms step_avg:151.95ms step:2588/3200 train_loss:3.4045 train_time:391715ms step_avg:151.95ms step:2589/3200 train_loss:3.2719 train_time:391866ms step_avg:151.95ms step:2590/3200 train_loss:3.4320 train_time:392016ms step_avg:151.94ms step:2591/3200 train_loss:3.3369 train_time:392169ms step_avg:151.94ms step:2592/3200 train_loss:3.5528 train_time:392317ms step_avg:151.94ms step:2593/3200 train_loss:3.4164 train_time:392470ms step_avg:151.94ms step:2594/3200 train_loss:3.2323 train_time:392619ms step_avg:151.94ms step:2595/3200 train_loss:3.3063 train_time:392770ms step_avg:151.94ms step:2596/3200 train_loss:3.7302 train_time:392919ms step_avg:151.94ms step:2597/3200 train_loss:3.3945 train_time:393070ms step_avg:151.94ms step:2598/3200 train_loss:3.3874 train_time:393219ms step_avg:151.94ms step:2599/3200 train_loss:3.2413 train_time:393371ms step_avg:151.94ms step:2600/3200 train_loss:3.4853 train_time:393520ms step_avg:151.94ms step:2601/3200 train_loss:3.6436 train_time:393671ms step_avg:151.94ms step:2602/3200 train_loss:3.2326 train_time:393820ms step_avg:151.94ms step:2603/3200 train_loss:3.3707 train_time:393971ms step_avg:151.94ms step:2604/3200 train_loss:3.2134 train_time:394121ms step_avg:151.94ms step:2605/3200 train_loss:3.5052 train_time:394272ms step_avg:151.94ms step:2606/3200 train_loss:3.3733 train_time:394420ms step_avg:151.93ms step:2607/3200 train_loss:3.2657 train_time:394571ms step_avg:151.93ms step:2608/3200 train_loss:3.2218 train_time:394722ms step_avg:151.93ms step:2609/3200 train_loss:3.3402 train_time:394873ms step_avg:151.93ms step:2610/3200 train_loss:3.5168 train_time:395023ms step_avg:151.93ms step:2611/3200 train_loss:3.3839 train_time:395174ms step_avg:151.93ms step:2612/3200 train_loss:3.2293 train_time:395326ms step_avg:151.93ms step:2613/3200 train_loss:3.3165 train_time:395476ms step_avg:151.93ms step:2614/3200 train_loss:3.4270 train_time:395628ms step_avg:151.93ms step:2615/3200 train_loss:3.3618 train_time:395778ms step_avg:151.93ms step:2616/3200 train_loss:3.3603 train_time:395930ms step_avg:151.93ms step:2617/3200 train_loss:3.3998 train_time:396079ms step_avg:151.93ms step:2618/3200 train_loss:3.4282 train_time:396230ms step_avg:151.93ms step:2619/3200 train_loss:3.2810 train_time:396379ms step_avg:151.93ms step:2620/3200 train_loss:3.4535 train_time:396530ms step_avg:151.93ms step:2621/3200 train_loss:3.4184 train_time:396680ms step_avg:151.93ms step:2622/3200 train_loss:3.5460 train_time:396831ms step_avg:151.93ms step:2623/3200 train_loss:3.4581 train_time:396981ms step_avg:151.93ms step:2624/3200 train_loss:3.3794 train_time:397132ms step_avg:151.92ms step:2625/3200 train_loss:3.3330 train_time:397281ms step_avg:151.92ms step:2625/3200 val_loss:3.3631 train_time:397328ms step_avg:151.94ms step:2626/3200 train_loss:3.3585 train_time:397440ms step_avg:151.93ms step:2627/3200 train_loss:3.4238 train_time:397592ms step_avg:151.93ms step:2628/3200 train_loss:3.2321 train_time:397741ms step_avg:151.93ms step:2629/3200 train_loss:3.5097 train_time:397889ms step_avg:151.92ms step:2630/3200 train_loss:3.3874 train_time:398039ms step_avg:151.92ms step:2631/3200 train_loss:3.4306 train_time:398187ms step_avg:151.92ms step:2632/3200 train_loss:3.6683 train_time:398339ms step_avg:151.92ms step:2633/3200 train_loss:3.4076 train_time:398491ms step_avg:151.92ms step:2634/3200 train_loss:3.3317 train_time:398643ms step_avg:151.92ms step:2635/3200 train_loss:3.3012 train_time:398792ms step_avg:151.92ms step:2636/3200 train_loss:3.3452 train_time:398941ms step_avg:151.92ms step:2637/3200 train_loss:3.1292 train_time:399089ms step_avg:151.92ms step:2638/3200 train_loss:3.4459 train_time:399240ms step_avg:151.92ms step:2639/3200 train_loss:3.4118 train_time:399389ms step_avg:151.92ms step:2640/3200 train_loss:3.3076 train_time:399544ms step_avg:151.92ms step:2641/3200 train_loss:3.3902 train_time:399693ms step_avg:151.92ms step:2642/3200 train_loss:3.4216 train_time:399844ms step_avg:151.92ms step:2643/3200 train_loss:3.2174 train_time:399992ms step_avg:151.91ms step:2644/3200 train_loss:3.3360 train_time:400143ms step_avg:151.91ms step:2645/3200 train_loss:3.4103 train_time:400292ms step_avg:151.91ms step:2646/3200 train_loss:3.3724 train_time:400601ms step_avg:151.97ms step:2647/3200 train_loss:3.2638 train_time:400758ms step_avg:151.97ms step:2648/3200 train_loss:3.4846 train_time:400906ms step_avg:151.97ms step:2649/3200 train_loss:3.7373 train_time:401054ms step_avg:151.97ms step:2650/3200 train_loss:3.3824 train_time:401203ms step_avg:151.97ms step:2651/3200 train_loss:3.3415 train_time:401351ms step_avg:151.97ms step:2652/3200 train_loss:3.4768 train_time:401503ms step_avg:151.97ms step:2653/3200 train_loss:3.3140 train_time:401655ms step_avg:151.97ms step:2654/3200 train_loss:3.3035 train_time:401807ms step_avg:151.97ms step:2655/3200 train_loss:3.3737 train_time:401958ms step_avg:151.97ms step:2656/3200 train_loss:3.2926 train_time:402107ms step_avg:151.97ms step:2657/3200 train_loss:3.3201 train_time:402256ms step_avg:151.97ms step:2658/3200 train_loss:3.2922 train_time:402406ms step_avg:151.97ms step:2659/3200 train_loss:3.3826 train_time:402556ms step_avg:151.97ms step:2660/3200 train_loss:3.5172 train_time:402879ms step_avg:152.03ms step:2661/3200 train_loss:3.3124 train_time:403028ms step_avg:152.03ms step:2662/3200 train_loss:3.4652 train_time:403180ms step_avg:152.03ms step:2663/3200 train_loss:3.3342 train_time:403328ms step_avg:152.03ms step:2664/3200 train_loss:3.3296 train_time:403477ms step_avg:152.03ms step:2665/3200 train_loss:3.2602 train_time:403625ms step_avg:152.02ms step:2666/3200 train_loss:3.3042 train_time:403775ms step_avg:152.02ms step:2667/3200 train_loss:3.3519 train_time:403930ms step_avg:152.02ms step:2668/3200 train_loss:3.3853 train_time:404082ms step_avg:152.03ms step:2669/3200 train_loss:3.2939 train_time:404232ms step_avg:152.02ms step:2670/3200 train_loss:3.3599 train_time:404381ms step_avg:152.02ms step:2671/3200 train_loss:3.2519 train_time:404529ms step_avg:152.02ms step:2672/3200 train_loss:3.3217 train_time:404680ms step_avg:152.02ms step:2673/3200 train_loss:3.2961 train_time:404830ms step_avg:152.02ms step:2674/3200 train_loss:3.3707 train_time:404983ms step_avg:152.02ms step:2675/3200 train_loss:3.3921 train_time:405135ms step_avg:152.02ms step:2676/3200 train_loss:3.3571 train_time:405284ms step_avg:152.02ms step:2677/3200 train_loss:3.3510 train_time:405434ms step_avg:152.02ms step:2678/3200 train_loss:3.3785 train_time:405583ms step_avg:152.02ms step:2679/3200 train_loss:3.4226 train_time:405734ms step_avg:152.02ms step:2680/3200 train_loss:3.3279 train_time:405884ms step_avg:152.02ms step:2681/3200 train_loss:3.2561 train_time:406035ms step_avg:152.02ms step:2682/3200 train_loss:3.2989 train_time:406185ms step_avg:152.02ms step:2683/3200 train_loss:3.7748 train_time:406337ms step_avg:152.02ms step:2684/3200 train_loss:3.3577 train_time:406485ms step_avg:152.01ms step:2685/3200 train_loss:3.3879 train_time:406637ms step_avg:152.01ms step:2686/3200 train_loss:3.4326 train_time:406787ms step_avg:152.01ms step:2687/3200 train_loss:3.3522 train_time:406938ms step_avg:152.01ms step:2688/3200 train_loss:3.4349 train_time:407088ms step_avg:152.01ms step:2689/3200 train_loss:3.3654 train_time:407241ms step_avg:152.01ms step:2690/3200 train_loss:3.3503 train_time:407389ms step_avg:152.01ms step:2691/3200 train_loss:3.3787 train_time:407540ms step_avg:152.01ms step:2692/3200 train_loss:3.4472 train_time:407689ms step_avg:152.01ms step:2693/3200 train_loss:3.2454 train_time:407841ms step_avg:152.01ms step:2694/3200 train_loss:3.6292 train_time:407990ms step_avg:152.01ms step:2695/3200 train_loss:3.4300 train_time:408142ms step_avg:152.01ms step:2696/3200 train_loss:3.2171 train_time:408291ms step_avg:152.01ms step:2697/3200 train_loss:3.4133 train_time:408442ms step_avg:152.01ms step:2698/3200 train_loss:3.3769 train_time:408592ms step_avg:152.01ms step:2699/3200 train_loss:3.3301 train_time:408743ms step_avg:152.01ms step:2700/3200 train_loss:3.4306 train_time:408891ms step_avg:152.00ms step:2701/3200 train_loss:3.4062 train_time:409043ms step_avg:152.00ms step:2702/3200 train_loss:3.3072 train_time:409191ms step_avg:152.00ms step:2703/3200 train_loss:3.3211 train_time:409343ms step_avg:152.00ms step:2704/3200 train_loss:3.3414 train_time:409492ms step_avg:152.00ms step:2705/3200 train_loss:3.3084 train_time:409644ms step_avg:152.00ms step:2706/3200 train_loss:3.4757 train_time:409793ms step_avg:152.00ms step:2707/3200 train_loss:3.4403 train_time:409944ms step_avg:152.00ms step:2708/3200 train_loss:3.3457 train_time:410094ms step_avg:152.00ms step:2709/3200 train_loss:3.3433 train_time:410244ms step_avg:152.00ms step:2710/3200 train_loss:3.4485 train_time:410393ms step_avg:152.00ms step:2711/3200 train_loss:3.3222 train_time:410545ms step_avg:152.00ms step:2712/3200 train_loss:3.4403 train_time:410695ms step_avg:152.00ms step:2713/3200 train_loss:3.1767 train_time:410845ms step_avg:152.00ms step:2714/3200 train_loss:3.3727 train_time:410995ms step_avg:152.00ms step:2715/3200 train_loss:3.2604 train_time:411146ms step_avg:151.99ms step:2716/3200 train_loss:3.2800 train_time:411297ms step_avg:151.99ms step:2717/3200 train_loss:3.4647 train_time:411446ms step_avg:151.99ms step:2718/3200 train_loss:3.3654 train_time:411597ms step_avg:151.99ms step:2719/3200 train_loss:3.5968 train_time:411746ms step_avg:151.99ms step:2720/3200 train_loss:3.3380 train_time:411898ms step_avg:151.99ms step:2721/3200 train_loss:3.3324 train_time:412047ms step_avg:151.99ms step:2722/3200 train_loss:3.5667 train_time:412199ms step_avg:151.99ms step:2723/3200 train_loss:3.3286 train_time:412349ms step_avg:151.99ms step:2724/3200 train_loss:3.5011 train_time:412501ms step_avg:151.99ms step:2725/3200 train_loss:3.3804 train_time:412652ms step_avg:151.99ms step:2726/3200 train_loss:3.3486 train_time:412803ms step_avg:151.99ms step:2727/3200 train_loss:3.3530 train_time:412952ms step_avg:151.99ms step:2728/3200 train_loss:3.6887 train_time:413103ms step_avg:151.99ms step:2729/3200 train_loss:3.4182 train_time:413253ms step_avg:151.99ms step:2730/3200 train_loss:3.2793 train_time:413404ms step_avg:151.99ms step:2731/3200 train_loss:3.3956 train_time:413553ms step_avg:151.99ms step:2732/3200 train_loss:3.3013 train_time:413704ms step_avg:151.99ms step:2733/3200 train_loss:3.1886 train_time:413854ms step_avg:151.98ms step:2734/3200 train_loss:3.2997 train_time:414005ms step_avg:151.98ms step:2735/3200 train_loss:3.3752 train_time:414155ms step_avg:151.98ms step:2736/3200 train_loss:3.2641 train_time:414306ms step_avg:151.98ms step:2737/3200 train_loss:3.6694 train_time:414457ms step_avg:151.98ms step:2738/3200 train_loss:3.4109 train_time:414607ms step_avg:151.98ms step:2739/3200 train_loss:3.6034 train_time:414759ms step_avg:151.98ms step:2740/3200 train_loss:3.3559 train_time:414908ms step_avg:151.98ms step:2741/3200 train_loss:3.3563 train_time:415060ms step_avg:151.98ms step:2742/3200 train_loss:3.2945 train_time:415210ms step_avg:151.98ms step:2743/3200 train_loss:3.3645 train_time:415362ms step_avg:151.98ms step:2744/3200 train_loss:3.3721 train_time:415511ms step_avg:151.98ms step:2745/3200 train_loss:3.4688 train_time:415663ms step_avg:151.98ms step:2746/3200 train_loss:3.2421 train_time:415813ms step_avg:151.98ms step:2747/3200 train_loss:3.3351 train_time:415963ms step_avg:151.98ms step:2748/3200 train_loss:3.3765 train_time:416114ms step_avg:151.98ms step:2749/3200 train_loss:3.4842 train_time:416264ms step_avg:151.98ms step:2750/3200 train_loss:3.3249 train_time:416413ms step_avg:151.98ms step:2750/3200 val_loss:3.3420 train_time:416460ms step_avg:151.99ms step:2751/3200 train_loss:3.4052 train_time:416574ms step_avg:151.98ms step:2752/3200 train_loss:3.4566 train_time:416724ms step_avg:151.98ms step:2753/3200 train_loss:3.3646 train_time:416874ms step_avg:151.98ms step:2754/3200 train_loss:3.2905 train_time:417021ms step_avg:151.98ms step:2755/3200 train_loss:3.2980 train_time:417171ms step_avg:151.98ms step:2756/3200 train_loss:3.3761 train_time:417319ms step_avg:151.97ms step:2757/3200 train_loss:3.3102 train_time:417470ms step_avg:151.97ms step:2758/3200 train_loss:3.1998 train_time:417623ms step_avg:151.97ms step:2759/3200 train_loss:3.5836 train_time:417775ms step_avg:151.97ms step:2760/3200 train_loss:3.3949 train_time:417926ms step_avg:151.97ms step:2761/3200 train_loss:3.3626 train_time:418075ms step_avg:151.97ms step:2762/3200 train_loss:3.3270 train_time:418223ms step_avg:151.97ms step:2763/3200 train_loss:3.2373 train_time:418373ms step_avg:151.97ms step:2764/3200 train_loss:3.4049 train_time:418523ms step_avg:151.97ms step:2765/3200 train_loss:3.3349 train_time:418676ms step_avg:151.97ms step:2766/3200 train_loss:3.2244 train_time:418826ms step_avg:151.97ms step:2767/3200 train_loss:3.3216 train_time:418977ms step_avg:151.97ms step:2768/3200 train_loss:3.3998 train_time:419126ms step_avg:151.97ms step:2769/3200 train_loss:3.2762 train_time:419276ms step_avg:151.97ms step:2770/3200 train_loss:3.3556 train_time:419424ms step_avg:151.97ms step:2771/3200 train_loss:3.3357 train_time:419575ms step_avg:151.96ms step:2772/3200 train_loss:3.7696 train_time:419725ms step_avg:151.96ms step:2773/3200 train_loss:3.2439 train_time:419877ms step_avg:151.96ms step:2774/3200 train_loss:3.3807 train_time:420027ms step_avg:151.96ms step:2775/3200 train_loss:3.4420 train_time:420177ms step_avg:151.96ms step:2776/3200 train_loss:3.4005 train_time:420329ms step_avg:151.96ms step:2777/3200 train_loss:3.4736 train_time:420478ms step_avg:151.96ms step:2778/3200 train_loss:3.4828 train_time:420629ms step_avg:151.96ms step:2779/3200 train_loss:3.3565 train_time:420779ms step_avg:151.96ms step:2780/3200 train_loss:3.2145 train_time:420929ms step_avg:151.96ms step:2781/3200 train_loss:3.3612 train_time:421079ms step_avg:151.96ms step:2782/3200 train_loss:3.3874 train_time:421230ms step_avg:151.96ms step:2783/3200 train_loss:3.2563 train_time:421379ms step_avg:151.96ms step:2784/3200 train_loss:3.3488 train_time:421531ms step_avg:151.96ms step:2785/3200 train_loss:3.4127 train_time:421679ms step_avg:151.96ms step:2786/3200 train_loss:3.2981 train_time:421832ms step_avg:151.96ms step:2787/3200 train_loss:3.4008 train_time:421982ms step_avg:151.96ms step:2788/3200 train_loss:3.3722 train_time:422133ms step_avg:151.96ms step:2789/3200 train_loss:3.2999 train_time:422282ms step_avg:151.95ms step:2790/3200 train_loss:3.3931 train_time:422433ms step_avg:151.95ms step:2791/3200 train_loss:3.3252 train_time:422582ms step_avg:151.95ms step:2792/3200 train_loss:3.2218 train_time:422734ms step_avg:151.95ms step:2793/3200 train_loss:3.3178 train_time:422884ms step_avg:151.95ms step:2794/3200 train_loss:3.3645 train_time:423034ms step_avg:151.95ms step:2795/3200 train_loss:3.2801 train_time:423184ms step_avg:151.95ms step:2796/3200 train_loss:3.3234 train_time:423334ms step_avg:151.95ms step:2797/3200 train_loss:3.2302 train_time:423482ms step_avg:151.95ms step:2798/3200 train_loss:3.3431 train_time:423633ms step_avg:151.95ms step:2799/3200 train_loss:3.2993 train_time:423782ms step_avg:151.95ms step:2800/3200 train_loss:3.4613 train_time:423933ms step_avg:151.95ms step:2801/3200 train_loss:3.4153 train_time:424082ms step_avg:151.95ms step:2802/3200 train_loss:3.3903 train_time:424233ms step_avg:151.95ms step:2803/3200 train_loss:3.3307 train_time:424382ms step_avg:151.94ms step:2804/3200 train_loss:3.5003 train_time:424534ms step_avg:151.94ms step:2805/3200 train_loss:3.4814 train_time:424682ms step_avg:151.94ms step:2806/3200 train_loss:3.2035 train_time:424832ms step_avg:151.94ms step:2807/3200 train_loss:3.6006 train_time:424981ms step_avg:151.94ms step:2808/3200 train_loss:3.3422 train_time:425133ms step_avg:151.94ms step:2809/3200 train_loss:3.2851 train_time:425282ms step_avg:151.94ms step:2810/3200 train_loss:3.2942 train_time:425434ms step_avg:151.94ms step:2811/3200 train_loss:3.4611 train_time:425583ms step_avg:151.94ms step:2812/3200 train_loss:3.4415 train_time:425734ms step_avg:151.94ms step:2813/3200 train_loss:3.1984 train_time:425884ms step_avg:151.94ms step:2814/3200 train_loss:3.4205 train_time:426034ms step_avg:151.94ms step:2815/3200 train_loss:3.4919 train_time:426185ms step_avg:151.94ms step:2816/3200 train_loss:3.2977 train_time:426336ms step_avg:151.94ms step:2817/3200 train_loss:3.0099 train_time:426486ms step_avg:151.94ms step:2818/3200 train_loss:3.3181 train_time:426637ms step_avg:151.94ms step:2819/3200 train_loss:3.2867 train_time:426788ms step_avg:151.94ms step:2820/3200 train_loss:3.4861 train_time:426937ms step_avg:151.94ms step:2821/3200 train_loss:3.3345 train_time:427088ms step_avg:151.93ms step:2822/3200 train_loss:3.3978 train_time:427238ms step_avg:151.93ms step:2823/3200 train_loss:3.3400 train_time:427389ms step_avg:151.93ms step:2824/3200 train_loss:3.3089 train_time:427539ms step_avg:151.93ms step:2825/3200 train_loss:3.2021 train_time:427690ms step_avg:151.93ms step:2826/3200 train_loss:3.4679 train_time:427840ms step_avg:151.93ms step:2827/3200 train_loss:3.3546 train_time:427992ms step_avg:151.93ms step:2828/3200 train_loss:3.2481 train_time:428141ms step_avg:151.93ms step:2829/3200 train_loss:3.3797 train_time:428293ms step_avg:151.93ms step:2830/3200 train_loss:3.3724 train_time:428443ms step_avg:151.93ms step:2831/3200 train_loss:3.3111 train_time:428593ms step_avg:151.93ms step:2832/3200 train_loss:3.4551 train_time:428743ms step_avg:151.93ms step:2833/3200 train_loss:3.3738 train_time:428894ms step_avg:151.93ms step:2834/3200 train_loss:3.3579 train_time:429043ms step_avg:151.93ms step:2835/3200 train_loss:3.1711 train_time:429352ms step_avg:151.98ms step:2836/3200 train_loss:3.3944 train_time:429510ms step_avg:151.98ms step:2837/3200 train_loss:3.3281 train_time:429657ms step_avg:151.98ms step:2838/3200 train_loss:3.6181 train_time:429805ms step_avg:151.98ms step:2839/3200 train_loss:3.2822 train_time:429954ms step_avg:151.98ms step:2840/3200 train_loss:3.2908 train_time:430102ms step_avg:151.98ms step:2841/3200 train_loss:3.3456 train_time:430253ms step_avg:151.98ms step:2842/3200 train_loss:3.2780 train_time:430407ms step_avg:151.98ms step:2843/3200 train_loss:3.2786 train_time:430558ms step_avg:151.98ms step:2844/3200 train_loss:3.4513 train_time:430709ms step_avg:151.98ms step:2845/3200 train_loss:3.3256 train_time:430858ms step_avg:151.98ms step:2846/3200 train_loss:3.3625 train_time:431007ms step_avg:151.98ms step:2847/3200 train_loss:3.3233 train_time:431157ms step_avg:151.98ms step:2848/3200 train_loss:3.5875 train_time:431309ms step_avg:151.98ms step:2849/3200 train_loss:3.2510 train_time:431461ms step_avg:151.98ms step:2850/3200 train_loss:3.2869 train_time:431780ms step_avg:152.04ms step:2851/3200 train_loss:3.3867 train_time:431929ms step_avg:152.03ms step:2852/3200 train_loss:3.3610 train_time:432079ms step_avg:152.03ms step:2853/3200 train_loss:3.3212 train_time:432228ms step_avg:152.03ms step:2854/3200 train_loss:3.3926 train_time:432376ms step_avg:152.03ms step:2855/3200 train_loss:3.2166 train_time:432526ms step_avg:152.03ms step:2856/3200 train_loss:3.2429 train_time:432678ms step_avg:152.03ms step:2857/3200 train_loss:3.3377 train_time:432832ms step_avg:152.03ms step:2858/3200 train_loss:3.3355 train_time:432981ms step_avg:152.03ms step:2859/3200 train_loss:3.2301 train_time:433132ms step_avg:152.03ms step:2860/3200 train_loss:3.3198 train_time:433280ms step_avg:152.03ms step:2861/3200 train_loss:3.2848 train_time:433429ms step_avg:152.03ms step:2862/3200 train_loss:3.3184 train_time:433578ms step_avg:152.03ms step:2863/3200 train_loss:3.3659 train_time:433731ms step_avg:152.03ms step:2864/3200 train_loss:3.6231 train_time:433882ms step_avg:152.03ms step:2865/3200 train_loss:3.4288 train_time:434034ms step_avg:152.03ms step:2866/3200 train_loss:3.3250 train_time:434183ms step_avg:152.02ms step:2867/3200 train_loss:3.2058 train_time:434333ms step_avg:152.02ms step:2868/3200 train_loss:3.4128 train_time:434481ms step_avg:152.02ms step:2869/3200 train_loss:3.3715 train_time:434632ms step_avg:152.02ms step:2870/3200 train_loss:3.3269 train_time:434782ms step_avg:152.02ms step:2871/3200 train_loss:3.4624 train_time:434934ms step_avg:152.02ms step:2872/3200 train_loss:3.2260 train_time:435084ms step_avg:152.02ms step:2873/3200 train_loss:3.3046 train_time:435235ms step_avg:152.02ms step:2874/3200 train_loss:3.1757 train_time:435384ms step_avg:152.02ms step:2875/3200 train_loss:3.3240 train_time:435534ms step_avg:152.02ms step:2875/3200 val_loss:3.3221 train_time:435580ms step_avg:152.03ms step:2876/3200 train_loss:3.2413 train_time:435690ms step_avg:152.02ms step:2877/3200 train_loss:3.2320 train_time:435841ms step_avg:152.02ms step:2878/3200 train_loss:3.3130 train_time:435993ms step_avg:152.02ms step:2879/3200 train_loss:3.4338 train_time:436140ms step_avg:152.02ms step:2880/3200 train_loss:3.3811 train_time:436289ms step_avg:152.02ms step:2881/3200 train_loss:3.3312 train_time:436435ms step_avg:152.02ms step:2882/3200 train_loss:3.3197 train_time:436588ms step_avg:152.02ms step:2883/3200 train_loss:3.4336 train_time:436742ms step_avg:152.02ms step:2884/3200 train_loss:3.2065 train_time:436894ms step_avg:152.02ms step:2885/3200 train_loss:3.2328 train_time:437042ms step_avg:152.01ms step:2886/3200 train_loss:3.2839 train_time:437194ms step_avg:152.01ms step:2887/3200 train_loss:3.2828 train_time:437342ms step_avg:152.01ms step:2888/3200 train_loss:3.2859 train_time:437492ms step_avg:152.01ms step:2889/3200 train_loss:3.3178 train_time:437641ms step_avg:152.01ms step:2890/3200 train_loss:3.5061 train_time:437794ms step_avg:152.01ms step:2891/3200 train_loss:3.3384 train_time:437943ms step_avg:152.01ms step:2892/3200 train_loss:3.1772 train_time:438095ms step_avg:152.01ms step:2893/3200 train_loss:3.1078 train_time:438243ms step_avg:152.01ms step:2894/3200 train_loss:3.2540 train_time:438396ms step_avg:152.01ms step:2895/3200 train_loss:3.1361 train_time:438544ms step_avg:152.01ms step:2896/3200 train_loss:3.3136 train_time:438696ms step_avg:152.01ms step:2897/3200 train_loss:3.4441 train_time:438845ms step_avg:152.01ms step:2898/3200 train_loss:3.2704 train_time:438997ms step_avg:152.01ms step:2899/3200 train_loss:3.3639 train_time:439147ms step_avg:152.01ms step:2900/3200 train_loss:3.2430 train_time:439299ms step_avg:152.01ms step:2901/3200 train_loss:3.4301 train_time:439448ms step_avg:152.01ms step:2902/3200 train_loss:3.4222 train_time:439599ms step_avg:152.01ms step:2903/3200 train_loss:3.4444 train_time:439750ms step_avg:152.00ms step:2904/3200 train_loss:3.1766 train_time:439899ms step_avg:152.00ms step:2905/3200 train_loss:3.3233 train_time:440050ms step_avg:152.00ms step:2906/3200 train_loss:3.2989 train_time:440201ms step_avg:152.00ms step:2907/3200 train_loss:3.3619 train_time:440352ms step_avg:152.00ms step:2908/3200 train_loss:3.3036 train_time:440502ms step_avg:152.00ms step:2909/3200 train_loss:3.2796 train_time:440652ms step_avg:152.00ms step:2910/3200 train_loss:3.6182 train_time:440802ms step_avg:152.00ms step:2911/3200 train_loss:3.3197 train_time:440952ms step_avg:152.00ms step:2912/3200 train_loss:3.2341 train_time:441102ms step_avg:152.00ms step:2913/3200 train_loss:3.2210 train_time:441253ms step_avg:152.00ms step:2914/3200 train_loss:3.6946 train_time:441403ms step_avg:152.00ms step:2915/3200 train_loss:3.2831 train_time:441555ms step_avg:152.00ms step:2916/3200 train_loss:3.2421 train_time:441704ms step_avg:152.00ms step:2917/3200 train_loss:3.2273 train_time:441856ms step_avg:152.00ms step:2918/3200 train_loss:3.5097 train_time:442004ms step_avg:152.00ms step:2919/3200 train_loss:3.0075 train_time:442155ms step_avg:152.00ms step:2920/3200 train_loss:3.2071 train_time:442305ms step_avg:151.99ms step:2921/3200 train_loss:3.2294 train_time:442456ms step_avg:151.99ms step:2922/3200 train_loss:3.3273 train_time:442605ms step_avg:151.99ms step:2923/3200 train_loss:3.3660 train_time:442757ms step_avg:151.99ms step:2924/3200 train_loss:3.4033 train_time:442905ms step_avg:151.99ms step:2925/3200 train_loss:3.4168 train_time:443056ms step_avg:151.99ms step:2926/3200 train_loss:3.2983 train_time:443205ms step_avg:151.99ms step:2927/3200 train_loss:3.2950 train_time:443357ms step_avg:151.99ms step:2928/3200 train_loss:3.2869 train_time:443506ms step_avg:151.99ms step:2929/3200 train_loss:3.2937 train_time:443658ms step_avg:151.99ms step:2930/3200 train_loss:3.2539 train_time:443807ms step_avg:151.99ms step:2931/3200 train_loss:3.2862 train_time:443958ms step_avg:151.99ms step:2932/3200 train_loss:3.4126 train_time:444107ms step_avg:151.99ms step:2933/3200 train_loss:3.4539 train_time:444258ms step_avg:151.99ms step:2934/3200 train_loss:3.4292 train_time:444408ms step_avg:151.99ms step:2935/3200 train_loss:3.2670 train_time:444558ms step_avg:151.99ms step:2936/3200 train_loss:3.3189 train_time:444709ms step_avg:151.99ms step:2937/3200 train_loss:3.2735 train_time:444861ms step_avg:151.99ms step:2938/3200 train_loss:3.2934 train_time:445013ms step_avg:151.99ms step:2939/3200 train_loss:3.3155 train_time:445162ms step_avg:151.98ms step:2940/3200 train_loss:3.3541 train_time:445314ms step_avg:151.98ms step:2941/3200 train_loss:3.3975 train_time:445463ms step_avg:151.98ms step:2942/3200 train_loss:3.3865 train_time:445615ms step_avg:151.98ms step:2943/3200 train_loss:3.3158 train_time:445766ms step_avg:151.98ms step:2944/3200 train_loss:3.1949 train_time:445917ms step_avg:151.98ms step:2945/3200 train_loss:3.1415 train_time:446067ms step_avg:151.98ms step:2946/3200 train_loss:3.3398 train_time:446217ms step_avg:151.98ms step:2947/3200 train_loss:3.4108 train_time:446367ms step_avg:151.98ms step:2948/3200 train_loss:3.3426 train_time:446517ms step_avg:151.98ms step:2949/3200 train_loss:3.5262 train_time:446666ms step_avg:151.98ms step:2950/3200 train_loss:3.3382 train_time:446818ms step_avg:151.98ms step:2951/3200 train_loss:3.3437 train_time:446968ms step_avg:151.98ms step:2952/3200 train_loss:3.7396 train_time:447118ms step_avg:151.98ms step:2953/3200 train_loss:3.4170 train_time:447266ms step_avg:151.98ms step:2954/3200 train_loss:3.3605 train_time:447418ms step_avg:151.98ms step:2955/3200 train_loss:3.3817 train_time:447569ms step_avg:151.98ms step:2956/3200 train_loss:3.3108 train_time:447720ms step_avg:151.98ms step:2957/3200 train_loss:3.3273 train_time:447871ms step_avg:151.98ms step:2958/3200 train_loss:3.2142 train_time:448019ms step_avg:151.97ms step:2959/3200 train_loss:3.2938 train_time:448169ms step_avg:151.97ms step:2960/3200 train_loss:3.4370 train_time:448319ms step_avg:151.97ms step:2961/3200 train_loss:3.2384 train_time:448470ms step_avg:151.97ms step:2962/3200 train_loss:3.3621 train_time:448621ms step_avg:151.97ms step:2963/3200 train_loss:3.2290 train_time:448772ms step_avg:151.97ms step:2964/3200 train_loss:3.2886 train_time:448921ms step_avg:151.97ms step:2965/3200 train_loss:3.2680 train_time:449072ms step_avg:151.97ms step:2966/3200 train_loss:3.3712 train_time:449221ms step_avg:151.97ms step:2967/3200 train_loss:3.2577 train_time:449374ms step_avg:151.97ms step:2968/3200 train_loss:3.4965 train_time:449523ms step_avg:151.97ms step:2969/3200 train_loss:3.3559 train_time:449676ms step_avg:151.97ms step:2970/3200 train_loss:3.3679 train_time:449824ms step_avg:151.97ms step:2971/3200 train_loss:3.3404 train_time:449976ms step_avg:151.97ms step:2972/3200 train_loss:3.4213 train_time:450125ms step_avg:151.97ms step:2973/3200 train_loss:3.2532 train_time:450276ms step_avg:151.97ms step:2974/3200 train_loss:3.2550 train_time:450425ms step_avg:151.97ms step:2975/3200 train_loss:3.1827 train_time:450576ms step_avg:151.97ms step:2976/3200 train_loss:3.2572 train_time:450725ms step_avg:151.96ms step:2977/3200 train_loss:3.2393 train_time:450877ms step_avg:151.96ms step:2978/3200 train_loss:3.2662 train_time:451026ms step_avg:151.96ms step:2979/3200 train_loss:3.5489 train_time:451176ms step_avg:151.96ms step:2980/3200 train_loss:3.3498 train_time:451325ms step_avg:151.96ms step:2981/3200 train_loss:3.3874 train_time:451476ms step_avg:151.96ms step:2982/3200 train_loss:3.4091 train_time:451626ms step_avg:151.96ms step:2983/3200 train_loss:3.4789 train_time:451777ms step_avg:151.96ms step:2984/3200 train_loss:3.2913 train_time:451928ms step_avg:151.96ms step:2985/3200 train_loss:3.3809 train_time:452077ms step_avg:151.96ms step:2986/3200 train_loss:3.3874 train_time:452225ms step_avg:151.96ms step:2987/3200 train_loss:3.3334 train_time:452376ms step_avg:151.96ms step:2988/3200 train_loss:3.4536 train_time:452526ms step_avg:151.96ms step:2989/3200 train_loss:3.0477 train_time:452676ms step_avg:151.96ms step:2990/3200 train_loss:3.3952 train_time:452826ms step_avg:151.96ms step:2991/3200 train_loss:3.3578 train_time:452977ms step_avg:151.95ms step:2992/3200 train_loss:3.3138 train_time:453127ms step_avg:151.95ms step:2993/3200 train_loss:3.2476 train_time:453276ms step_avg:151.95ms step:2994/3200 train_loss:3.3913 train_time:453426ms step_avg:151.95ms step:2995/3200 train_loss:3.2100 train_time:453577ms step_avg:151.95ms step:2996/3200 train_loss:3.2270 train_time:453728ms step_avg:151.95ms step:2997/3200 train_loss:3.3073 train_time:453878ms step_avg:151.95ms step:2998/3200 train_loss:3.2423 train_time:454028ms step_avg:151.95ms step:2999/3200 train_loss:3.3669 train_time:454177ms step_avg:151.95ms step:3000/3200 train_loss:3.2738 train_time:454327ms step_avg:151.95ms step:3000/3200 val_loss:3.3039 train_time:454373ms step_avg:151.96ms step:3001/3200 train_loss:3.2582 train_time:454485ms step_avg:151.95ms step:3002/3200 train_loss:3.2116 train_time:454636ms step_avg:151.95ms step:3003/3200 train_loss:3.2433 train_time:454785ms step_avg:151.95ms step:3004/3200 train_loss:3.3789 train_time:454933ms step_avg:151.95ms step:3005/3200 train_loss:3.7168 train_time:455082ms step_avg:151.95ms step:3006/3200 train_loss:3.2860 train_time:455231ms step_avg:151.95ms step:3007/3200 train_loss:3.3561 train_time:455381ms step_avg:151.95ms step:3008/3200 train_loss:3.1573 train_time:455535ms step_avg:151.95ms step:3009/3200 train_loss:3.3824 train_time:455687ms step_avg:151.95ms step:3010/3200 train_loss:3.2832 train_time:455836ms step_avg:151.95ms step:3011/3200 train_loss:3.3428 train_time:455986ms step_avg:151.94ms step:3012/3200 train_loss:3.3332 train_time:456134ms step_avg:151.94ms step:3013/3200 train_loss:3.2203 train_time:456285ms step_avg:151.94ms step:3014/3200 train_loss:3.4232 train_time:456434ms step_avg:151.94ms step:3015/3200 train_loss:3.3847 train_time:456587ms step_avg:151.94ms step:3016/3200 train_loss:3.2439 train_time:456735ms step_avg:151.94ms step:3017/3200 train_loss:3.2950 train_time:456886ms step_avg:151.94ms step:3018/3200 train_loss:3.3327 train_time:457035ms step_avg:151.94ms step:3019/3200 train_loss:3.3750 train_time:457186ms step_avg:151.94ms step:3020/3200 train_loss:3.1543 train_time:457335ms step_avg:151.94ms step:3021/3200 train_loss:3.4616 train_time:457487ms step_avg:151.94ms step:3022/3200 train_loss:3.2879 train_time:457637ms step_avg:151.94ms step:3023/3200 train_loss:3.2066 train_time:457788ms step_avg:151.94ms step:3024/3200 train_loss:3.3100 train_time:458094ms step_avg:151.99ms step:3025/3200 train_loss:3.2801 train_time:458252ms step_avg:151.99ms step:3026/3200 train_loss:3.3501 train_time:458401ms step_avg:151.99ms step:3027/3200 train_loss:3.3706 train_time:458549ms step_avg:151.99ms step:3028/3200 train_loss:3.2730 train_time:458696ms step_avg:151.99ms step:3029/3200 train_loss:3.0754 train_time:458847ms step_avg:151.99ms step:3030/3200 train_loss:3.4126 train_time:458997ms step_avg:151.99ms step:3031/3200 train_loss:3.1717 train_time:459152ms step_avg:151.99ms step:3032/3200 train_loss:3.1673 train_time:459304ms step_avg:151.99ms step:3033/3200 train_loss:3.5021 train_time:459453ms step_avg:151.99ms step:3034/3200 train_loss:3.5068 train_time:459604ms step_avg:151.99ms step:3035/3200 train_loss:3.2692 train_time:459752ms step_avg:151.98ms step:3036/3200 train_loss:3.3513 train_time:459902ms step_avg:151.98ms step:3037/3200 train_loss:3.2961 train_time:460052ms step_avg:151.98ms step:3038/3200 train_loss:3.1988 train_time:460205ms step_avg:151.98ms step:3039/3200 train_loss:3.2500 train_time:460354ms step_avg:151.98ms step:3040/3200 train_loss:3.3471 train_time:460671ms step_avg:152.04ms step:3041/3200 train_loss:3.3374 train_time:460829ms step_avg:152.04ms step:3042/3200 train_loss:3.1361 train_time:460977ms step_avg:152.04ms step:3043/3200 train_loss:3.2941 train_time:461126ms step_avg:152.04ms step:3044/3200 train_loss:3.3242 train_time:461273ms step_avg:152.03ms step:3045/3200 train_loss:3.3289 train_time:461424ms step_avg:152.03ms step:3046/3200 train_loss:3.4042 train_time:461574ms step_avg:152.03ms step:3047/3200 train_loss:3.2259 train_time:461728ms step_avg:152.03ms step:3048/3200 train_loss:3.3556 train_time:461879ms step_avg:152.03ms step:3049/3200 train_loss:3.3032 train_time:462030ms step_avg:152.03ms step:3050/3200 train_loss:3.2254 train_time:462180ms step_avg:152.03ms step:3051/3200 train_loss:3.3508 train_time:462329ms step_avg:152.03ms step:3052/3200 train_loss:3.1959 train_time:462475ms step_avg:152.03ms step:3053/3200 train_loss:3.4375 train_time:462628ms step_avg:152.03ms step:3054/3200 train_loss:3.3900 train_time:462780ms step_avg:152.03ms step:3055/3200 train_loss:3.3603 train_time:462932ms step_avg:152.03ms step:3056/3200 train_loss:3.3666 train_time:463085ms step_avg:152.03ms step:3057/3200 train_loss:3.2493 train_time:463235ms step_avg:152.03ms step:3058/3200 train_loss:3.2668 train_time:463385ms step_avg:152.03ms step:3059/3200 train_loss:3.3423 train_time:463533ms step_avg:152.03ms step:3060/3200 train_loss:3.2595 train_time:463685ms step_avg:152.03ms step:3061/3200 train_loss:3.3115 train_time:463835ms step_avg:152.03ms step:3062/3200 train_loss:3.3141 train_time:463987ms step_avg:152.03ms step:3063/3200 train_loss:3.2616 train_time:464137ms step_avg:152.03ms step:3064/3200 train_loss:3.2247 train_time:464288ms step_avg:152.03ms step:3065/3200 train_loss:3.2471 train_time:464435ms step_avg:152.02ms step:3066/3200 train_loss:3.2250 train_time:464585ms step_avg:152.02ms step:3067/3200 train_loss:3.2144 train_time:464735ms step_avg:152.02ms step:3068/3200 train_loss:3.1755 train_time:464887ms step_avg:152.02ms step:3069/3200 train_loss:3.2166 train_time:465036ms step_avg:152.02ms step:3070/3200 train_loss:3.2011 train_time:465187ms step_avg:152.02ms step:3071/3200 train_loss:3.3954 train_time:465335ms step_avg:152.02ms step:3072/3200 train_loss:3.3226 train_time:465486ms step_avg:152.02ms step:3073/3200 train_loss:3.3698 train_time:465635ms step_avg:152.02ms step:3074/3200 train_loss:3.3568 train_time:465787ms step_avg:152.02ms step:3075/3200 train_loss:3.2963 train_time:465936ms step_avg:152.02ms step:3076/3200 train_loss:3.3525 train_time:466089ms step_avg:152.02ms step:3077/3200 train_loss:3.4058 train_time:466239ms step_avg:152.02ms step:3078/3200 train_loss:3.2105 train_time:466388ms step_avg:152.02ms step:3079/3200 train_loss:3.7341 train_time:466537ms step_avg:152.02ms step:3080/3200 train_loss:3.2924 train_time:466689ms step_avg:152.02ms step:3081/3200 train_loss:3.2555 train_time:466838ms step_avg:152.02ms step:3082/3200 train_loss:3.4103 train_time:466989ms step_avg:152.01ms step:3083/3200 train_loss:3.2111 train_time:467139ms step_avg:152.01ms step:3084/3200 train_loss:3.2473 train_time:467289ms step_avg:152.01ms step:3085/3200 train_loss:3.3028 train_time:467437ms step_avg:152.01ms step:3086/3200 train_loss:3.3936 train_time:467587ms step_avg:152.01ms step:3087/3200 train_loss:3.3045 train_time:467738ms step_avg:152.01ms step:3088/3200 train_loss:3.2154 train_time:467888ms step_avg:152.01ms step:3089/3200 train_loss:3.3661 train_time:468039ms step_avg:152.01ms step:3090/3200 train_loss:3.2237 train_time:468190ms step_avg:152.01ms step:3091/3200 train_loss:3.4864 train_time:468340ms step_avg:152.01ms step:3092/3200 train_loss:4.0606 train_time:468490ms step_avg:152.01ms step:3093/3200 train_loss:3.3239 train_time:468640ms step_avg:152.01ms step:3094/3200 train_loss:3.2137 train_time:468791ms step_avg:152.01ms step:3095/3200 train_loss:3.1725 train_time:468943ms step_avg:152.01ms step:3096/3200 train_loss:3.3335 train_time:469092ms step_avg:152.01ms step:3097/3200 train_loss:3.4773 train_time:469245ms step_avg:152.01ms step:3098/3200 train_loss:3.2424 train_time:469394ms step_avg:152.01ms step:3099/3200 train_loss:3.2757 train_time:469546ms step_avg:152.01ms step:3100/3200 train_loss:3.4499 train_time:469696ms step_avg:152.01ms step:3101/3200 train_loss:3.3568 train_time:469847ms step_avg:152.00ms step:3102/3200 train_loss:3.3514 train_time:469997ms step_avg:152.00ms step:3103/3200 train_loss:3.2651 train_time:470147ms step_avg:152.00ms step:3104/3200 train_loss:3.5147 train_time:470297ms step_avg:152.00ms step:3105/3200 train_loss:3.3391 train_time:470447ms step_avg:152.00ms step:3106/3200 train_loss:3.1896 train_time:470597ms step_avg:152.00ms step:3107/3200 train_loss:3.2184 train_time:470748ms step_avg:152.00ms step:3108/3200 train_loss:3.1725 train_time:470899ms step_avg:152.00ms step:3109/3200 train_loss:3.3999 train_time:471048ms step_avg:152.00ms step:3110/3200 train_loss:3.2888 train_time:471198ms step_avg:152.00ms step:3111/3200 train_loss:3.3173 train_time:471348ms step_avg:152.00ms step:3112/3200 train_loss:3.2990 train_time:471497ms step_avg:152.00ms step:3113/3200 train_loss:3.3488 train_time:471648ms step_avg:152.00ms step:3114/3200 train_loss:3.3126 train_time:471799ms step_avg:152.00ms step:3115/3200 train_loss:3.3029 train_time:471951ms step_avg:152.00ms step:3116/3200 train_loss:3.3445 train_time:472101ms step_avg:152.00ms step:3117/3200 train_loss:3.1939 train_time:472250ms step_avg:152.00ms step:3118/3200 train_loss:3.2240 train_time:472401ms step_avg:152.00ms step:3119/3200 train_loss:3.3898 train_time:472551ms step_avg:151.99ms step:3120/3200 train_loss:3.3757 train_time:472702ms step_avg:151.99ms step:3121/3200 train_loss:3.1694 train_time:472852ms step_avg:151.99ms step:3122/3200 train_loss:3.3660 train_time:473003ms step_avg:151.99ms step:3123/3200 train_loss:3.4177 train_time:473153ms step_avg:151.99ms step:3124/3200 train_loss:3.3847 train_time:473304ms step_avg:151.99ms step:3125/3200 train_loss:3.1826 train_time:473453ms step_avg:151.99ms step:3125/3200 val_loss:3.2889 train_time:473501ms step_avg:152.01ms step:3126/3200 train_loss:3.2672 train_time:473614ms step_avg:151.99ms step:3127/3200 train_loss:3.2882 train_time:473767ms step_avg:151.99ms step:3128/3200 train_loss:3.3864 train_time:473917ms step_avg:151.99ms step:3129/3200 train_loss:3.4634 train_time:474067ms step_avg:151.99ms step:3130/3200 train_loss:3.1614 train_time:474215ms step_avg:151.99ms step:3131/3200 train_loss:3.3286 train_time:474364ms step_avg:151.99ms step:3132/3200 train_loss:3.3256 train_time:474514ms step_avg:151.99ms step:3133/3200 train_loss:3.3547 train_time:474668ms step_avg:151.99ms step:3134/3200 train_loss:3.2376 train_time:474820ms step_avg:151.99ms step:3135/3200 train_loss:3.3642 train_time:474970ms step_avg:151.99ms step:3136/3200 train_loss:3.2716 train_time:475119ms step_avg:151.99ms step:3137/3200 train_loss:3.3449 train_time:475269ms step_avg:151.99ms step:3138/3200 train_loss:3.5299 train_time:475418ms step_avg:151.99ms step:3139/3200 train_loss:3.5093 train_time:475570ms step_avg:151.99ms step:3140/3200 train_loss:3.2685 train_time:475720ms step_avg:151.99ms step:3141/3200 train_loss:3.2911 train_time:475872ms step_avg:151.99ms step:3142/3200 train_loss:3.2195 train_time:476022ms step_avg:151.99ms step:3143/3200 train_loss:3.3070 train_time:476173ms step_avg:151.99ms step:3144/3200 train_loss:3.1090 train_time:476322ms step_avg:151.99ms step:3145/3200 train_loss:3.3459 train_time:476472ms step_avg:151.98ms step:3146/3200 train_loss:3.2622 train_time:476621ms step_avg:151.98ms step:3147/3200 train_loss:3.2777 train_time:476773ms step_avg:151.98ms step:3148/3200 train_loss:3.4472 train_time:476925ms step_avg:151.98ms step:3149/3200 train_loss:3.5287 train_time:477075ms step_avg:151.98ms step:3150/3200 train_loss:3.4096 train_time:477226ms step_avg:151.98ms step:3151/3200 train_loss:3.2133 train_time:477376ms step_avg:151.98ms step:3152/3200 train_loss:3.2694 train_time:477528ms step_avg:151.98ms step:3153/3200 train_loss:3.2356 train_time:477677ms step_avg:151.98ms step:3154/3200 train_loss:3.3632 train_time:477830ms step_avg:151.98ms step:3155/3200 train_loss:3.1754 train_time:477981ms step_avg:151.98ms step:3156/3200 train_loss:3.3058 train_time:478132ms step_avg:151.98ms step:3157/3200 train_loss:3.2520 train_time:478281ms step_avg:151.98ms step:3158/3200 train_loss:3.3898 train_time:478431ms step_avg:151.98ms step:3159/3200 train_loss:3.4358 train_time:478582ms step_avg:151.98ms step:3160/3200 train_loss:3.2891 train_time:478732ms step_avg:151.98ms step:3161/3200 train_loss:3.3505 train_time:478882ms step_avg:151.98ms step:3162/3200 train_loss:3.4370 train_time:479033ms step_avg:151.98ms step:3163/3200 train_loss:3.3363 train_time:479185ms step_avg:151.98ms step:3164/3200 train_loss:3.3860 train_time:479335ms step_avg:151.98ms step:3165/3200 train_loss:3.2121 train_time:479487ms step_avg:151.98ms step:3166/3200 train_loss:3.1968 train_time:479637ms step_avg:151.98ms step:3167/3200 train_loss:3.2371 train_time:479789ms step_avg:151.98ms step:3168/3200 train_loss:3.0568 train_time:479938ms step_avg:151.98ms step:3169/3200 train_loss:3.2305 train_time:480089ms step_avg:151.98ms step:3170/3200 train_loss:3.3637 train_time:480239ms step_avg:151.97ms step:3171/3200 train_loss:3.3806 train_time:480390ms step_avg:151.97ms step:3172/3200 train_loss:3.3572 train_time:480540ms step_avg:151.97ms step:3173/3200 train_loss:3.3304 train_time:480690ms step_avg:151.97ms step:3174/3200 train_loss:3.2975 train_time:480839ms step_avg:151.97ms step:3175/3200 train_loss:3.2956 train_time:480991ms step_avg:151.97ms step:3176/3200 train_loss:3.2839 train_time:481140ms step_avg:151.97ms step:3177/3200 train_loss:3.2312 train_time:481292ms step_avg:151.97ms step:3178/3200 train_loss:3.3463 train_time:481442ms step_avg:151.97ms step:3179/3200 train_loss:3.4445 train_time:481592ms step_avg:151.97ms step:3180/3200 train_loss:3.2691 train_time:481742ms step_avg:151.97ms step:3181/3200 train_loss:3.2653 train_time:481893ms step_avg:151.97ms step:3182/3200 train_loss:3.3078 train_time:482044ms step_avg:151.97ms step:3183/3200 train_loss:3.4044 train_time:482193ms step_avg:151.97ms step:3184/3200 train_loss:3.4199 train_time:482345ms step_avg:151.97ms step:3185/3200 train_loss:3.3179 train_time:482495ms step_avg:151.97ms step:3186/3200 train_loss:3.3915 train_time:482647ms step_avg:151.97ms step:3187/3200 train_loss:3.3810 train_time:482796ms step_avg:151.97ms step:3188/3200 train_loss:3.1759 train_time:482948ms step_avg:151.97ms step:3189/3200 train_loss:3.2669 train_time:483097ms step_avg:151.97ms step:3190/3200 train_loss:3.2891 train_time:483249ms step_avg:151.97ms step:3191/3200 train_loss:3.3126 train_time:483398ms step_avg:151.96ms step:3192/3200 train_loss:3.2715 train_time:483550ms step_avg:151.96ms step:3193/3200 train_loss:3.2024 train_time:483699ms step_avg:151.96ms step:3194/3200 train_loss:4.2154 train_time:483850ms step_avg:151.96ms step:3195/3200 train_loss:3.3089 train_time:484000ms step_avg:151.96ms step:3196/3200 train_loss:3.1160 train_time:484150ms step_avg:151.96ms step:3197/3200 train_loss:3.2529 train_time:484299ms step_avg:151.96ms step:3198/3200 train_loss:3.1268 train_time:484451ms step_avg:151.96ms step:3199/3200 train_loss:3.2616 train_time:484600ms step_avg:151.96ms step:3200/3200 train_loss:3.1962 train_time:484752ms step_avg:151.96ms step:3200/3200 val_loss:3.2844 train_time:484797ms step_avg:151.97ms