==================================================================================================== import os import sys with open(sys.argv[0]) as f: code = f.read() # read the code of this file ASAP, for logging import uuid import glob import time from dataclasses import dataclass import numpy as np import torch from torch import nn import torch.nn.functional as F import torch.distributed as dist import torch._inductor.config as config from torch.nn.parallel import DistributedDataParallel as DDP # ----------------------------------------------------------------------------- # Muon optimizer def zeropower_via_svd(G, steps=None): U, S, V = G.svd() return U @ V.T @torch.compile def zeropower_via_newtonschulz5(G, steps=10, eps=1e-7): """ Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. We opt to use a quintic iteration whose coefficients are selected to maximize the slope at zero. For the purpose of minimizing steps, it turns out to be empirically effective to keep increasing the slope at zero even beyond the point where the iteration no longer converges all the way to one everywhere on the interval. This iteration therefore does not produce UV^T but rather something like US'V^T where S' is diagonal with S_{ii}' \sim Uniform(0.5, 1.5), which turns out not to hurt model performance at all relative to UV^T, where USV^T = G is the SVD. """ assert len(G.shape) == 2 a, b, c = (3.4445, -4.7750, 2.0315) X = G.bfloat16() X /= (X.norm() + eps) # ensure top singular value <= 1 if G.size(0) > G.size(1): X = X.T for _ in range(steps): A = X @ X.T B = A @ X X = a * X + b * B + c * A @ B if G.size(0) > G.size(1): X = X.T return X zeropower_backends = dict(svd=zeropower_via_svd, newtonschulz5=zeropower_via_newtonschulz5) class Muon(torch.optim.Optimizer): """ Muon - MomentUm Orthogonalized by Newton-schulz Muon internally runs standard SGD-momentum, and then performs an orthogonalization post- processing step, in which each 2D parameter's update is replaced with the nearest orthogonal matrix. To efficiently orthogonalize each update, we use a Newton-Schulz iteration, which has the advantage that it can be stably run in bfloat16 on the GPU. Some warnings: - This optimizer assumes that all parameters passed in are 2D. - It should not be used for the embedding layer, the final fully connected layer, or any {0,1}-D parameters; those should all be optimized by a standard method (e.g., AdamW). - To use it with 4D convolutional filters, it works well to just flatten their last 3 dimensions. - We believe it is unlikely to work well for training with small batch size. - We believe it may not work well for finetuning pretrained models, but we haven't tested this. - We have not yet tried this optimizer for training scenarios larger than NanoGPT (124M). Arguments: lr: The learning rate used by the internal SGD. momentum: The momentum used by the internal SGD. nesterov: Whether to use Nesterov-style momentum in the internal SGD. (recommended) backend: The chosen backend for the orthogonalization step. (recommended: 'newtonschulz5') backend_steps: The number of iteration steps to use in the backend, if it is iterative. """ def __init__(self, params, lr=0.02, momentum=0.95, nesterov=True, backend='newtonschulz5', backend_steps=5): defaults = dict(lr=lr, momentum=momentum, nesterov=nesterov, backend=backend, backend_steps=backend_steps) super().__init__(params, defaults) def step(self): for group in self.param_groups: lr = group['lr'] momentum = group['momentum'] zeropower_backend = zeropower_backends[group['backend']] # generate weight updates in distributed fashion total_params = sum(p.numel() for p in group['params']) updates_flat = torch.zeros(total_params, device='cuda', dtype=torch.bfloat16) curr_idx = 0 for i, p in enumerate(group['params']): # luckily this will perfectly distribute a transformer with multiple of 4 layers to 8 GPUs if i % int(os.environ['WORLD_SIZE']) == int(os.environ['RANK']): g = p.grad assert g is not None state = self.state[p] if 'momentum_buffer' not in state: state['momentum_buffer'] = torch.zeros_like(g) buf = state['momentum_buffer'] buf.mul_(momentum).add_(g) if group['nesterov']: g = g.add(buf, alpha=momentum) g = zeropower_backend(g, steps=group['backend_steps']) g *= max(1, g.size(0)/g.size(1))**0.5 updates_flat[curr_idx:curr_idx+p.numel()] = g.flatten() curr_idx += p.numel() # sync updates across devices. we are not memory-constrained so can do this simple deserialization dist.all_reduce(updates_flat, op=dist.ReduceOp.SUM) # deserialize and apply updates curr_idx = 0 for p in group['params']: g = updates_flat[curr_idx:curr_idx+p.numel()].view_as(p.data).type_as(p.data) p.data.add_(g, alpha=-lr) curr_idx += p.numel() # ----------------------------------------------------------------------------- # PyTorch nn.Module definitions for the GPT-2 model class Rotary(torch.nn.Module): def __init__(self, dim, base=10000): super().__init__() self.inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim)) self.seq_len_cached = None self.cos_cached = None self.sin_cached = None def forward(self, x): seq_len = x.shape[1] if seq_len != self.seq_len_cached: self.seq_len_cached = seq_len t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq) freqs = torch.outer(t, self.inv_freq).to(x.device) self.cos_cached = freqs.cos().bfloat16() self.sin_cached = freqs.sin().bfloat16() return self.cos_cached[None, :, None, :], self.sin_cached[None, :, None, :] def apply_rotary_emb(x, cos, sin): assert x.ndim == 4 # multihead attention d = x.shape[3]//2 x1 = x[..., :d] x2 = x[..., d:] y1 = x1 * cos + x2 * sin y2 = x1 * (-sin) + x2 * cos return torch.cat([y1, y2], 3).type_as(x) class CausalSelfAttention(nn.Module): def __init__(self, config): super().__init__() self.n_head = config.n_head self.n_embd = config.n_embd self.head_dim = self.n_embd // self.n_head assert self.n_embd % self.n_head == 0 self.c_q = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_k = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_v = nn.Linear(self.n_embd, self.n_embd, bias=False) # output projection self.c_proj = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 self.rotary = Rotary(self.head_dim) self.lamb = nn.Parameter(torch.tensor(0.5)) # @Grad62304977 def forward(self, x, v1=None): B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd) q = self.c_q(x).view(B, T, self.n_head, self.head_dim) k = self.c_k(x).view(B, T, self.n_head, self.head_dim) v = self.c_v(x).view(B, T, self.n_head, self.head_dim) if v1 is None: v1 = v # This happens if we are in the first block. v needs to be accessed by subsequent blocks v = (1 - self.lamb) * v + self.lamb * v1.view_as(v) # @Grad62304977 cos, sin = self.rotary(q) q, k = F.rms_norm(q, (q.size(-1),)), F.rms_norm(k, (k.size(-1),)) # QK norm suggested by @Grad62304977 q, k = apply_rotary_emb(q, cos, sin), apply_rotary_emb(k, cos, sin) y = F.scaled_dot_product_attention(q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), is_causal=True) y = y.transpose(1, 2).contiguous().view_as(x) # re-assemble all head outputs side by side y = self.c_proj(y) return y, v1 class MLP(nn.Module): def __init__(self, config): super().__init__() self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd, bias=False) self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 def forward(self, x): x = self.c_fc(x) x = F.relu(x).square() # https://arxiv.org/abs/2109.08668v2; ~1-2% better than GELU; suggested by @SKYLINEZ007 and @Grad62304977 x = self.c_proj(x) return x class Block(nn.Module): def __init__(self, config): super().__init__() self.attn = CausalSelfAttention(config) self.mlp = MLP(config) def forward(self, x, v1): x1, v1 = self.attn(F.rms_norm(x, (x.size(-1),)), v1) x = x + x1 x = x + self.mlp(F.rms_norm(x, (x.size(-1),))) return x, v1 # ----------------------------------------------------------------------------- # The main GPT-2 model @dataclass class GPTConfig: vocab_size : int = 50304 n_layer : int = 12 n_head : int = 6 # head dim 128 suggested by @Grad62304977 n_embd : int = 768 class GPT(nn.Module): def __init__(self, config): super().__init__() self.config = config self.transformer = nn.ModuleDict(dict( wte = nn.Embedding(config.vocab_size, config.n_embd), h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]), )) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.lm_head.weight.data.zero_() # @Grad62304977 def forward(self, idx, targets=None, return_logits=True): # forward the GPT model itself x = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) x = F.rms_norm(x, (x.size(-1),)) # @Grad62304977 v1 = None for block in self.transformer.h: x, v1 = block(x, v1) x = F.rms_norm(x, (x.size(-1),)) if targets is not None: # if we are given some desired targets also calculate the loss logits = self.lm_head(x) logits = logits.float() # use tf32/fp32 for logits loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1) else: # inference-time mini-optimization: only forward the lm_head on the very last position logits = self.lm_head(x[:, [-1], :]) # note: using list [-1] to preserve the time dim logits = logits.float() # use tf32/fp32 for logits loss = None # there are performance reasons why not returning logits is prudent, if not needed if not return_logits: logits = None return logits, loss # ----------------------------------------------------------------------------- # Our own simple Distributed Data Loader def _peek_data_shard(filename): # only reads the header, returns header data with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) if header[0] != 20240520: print("ERROR: magic number mismatch in the data .bin file!") print("---> HINT: Are you passing in a correct file with --input_bin?") print("---> HINT: Dataset encoding changed recently, re-run data prepro or refer again to README") print("---> HINT: For example re-run: `python dev/data/tinyshakespeare.py`, then re-try") exit(1) assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) return ntok # for now just return the number of tokens def _load_data_shard(filename): with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) assert header[0] == 20240520, "magic number mismatch in the data .bin file" assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) # the rest of it are tokens, stored as uint16 tokens = np.frombuffer(f.read(), dtype=np.uint16) assert len(tokens) == ntok, "number of tokens read does not match header?" return tokens class DistributedDataLoader: def __init__(self, filename_pattern, B, T, process_rank, num_processes): self.process_rank = process_rank self.num_processes = num_processes self.B = B self.T = T # glob files that match the pattern self.files = sorted(glob.glob(filename_pattern)) assert len(self.files) > 0, f"did not find any files that match the pattern {filename_pattern}" # load and validate all data shards, count number of tokens in total ntok_total = 0 for fname in self.files: shard_ntok = _peek_data_shard(fname) assert shard_ntok >= num_processes * B * T + 1 ntok_total += int(shard_ntok) self.ntok_total = ntok_total # kick things off self.reset() def reset(self): self.current_shard = 0 self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def advance(self): # advance to next data shard self.current_shard = (self.current_shard + 1) % len(self.files) self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def next_batch(self): B = self.B T = self.T buf = self.tokens[self.current_position : self.current_position+B*T+1] buf = torch.tensor(buf.astype(np.int32), dtype=torch.long) x = (buf[:-1]).view(B, T) # inputs y = (buf[1:]).view(B, T) # targets # advance current position and load next shard if necessary self.current_position += B * T * self.num_processes if self.current_position + (B * T * self.num_processes + 1) > len(self.tokens): self.advance() return x.cuda(), y.cuda() # ----------------------------------------------------------------------------- # int main @dataclass class Hyperparameters: # data hyperparams input_bin : str = 'data/fineweb10B/fineweb_train_*.bin' # input .bin to train on input_val_bin : str = 'data/fineweb10B/fineweb_val_*.bin' # input .bin to eval validation loss on # optimization hyperparams batch_size : int = 8*64 # batch size, in sequences, across all devices device_batch_size : int = 64 # batch size, in sequences, per device sequence_length : int = 1024 # sequence length, in tokens num_iterations : int = 3200 # number of iterations to run warmup_iters : int = 0 warmdown_iters : int = 914 # number of iterations of linear warmup/warmdown for triangular or trapezoidal schedule weight_decay : float = 0 # evaluation and logging hyperparams val_loss_every : int = 125 # every how many steps to evaluate val loss? 0 for only at the end val_tokens : int = 10485760 # how many tokens of validation data? it's important to keep this fixed for consistent comparisons save_every : int = 0 # every how many steps to save the checkpoint? 0 for only at the end args = Hyperparameters() # set up DDP (distributed data parallel). torchrun sets this env variable assert torch.cuda.is_available() dist.init_process_group(backend='nccl') ddp_rank = int(os.environ['RANK']) ddp_local_rank = int(os.environ['LOCAL_RANK']) ddp_world_size = int(os.environ['WORLD_SIZE']) device = f'cuda:{ddp_local_rank}' torch.cuda.set_device(device) print(f"using device: {device}") master_process = (ddp_rank == 0) # this process will do logging, checkpointing etc. # convenience variables B, T = args.device_batch_size, args.sequence_length # calculate the number of steps to take in the val loop. assert args.val_tokens % (B * T * ddp_world_size) == 0 val_steps = args.val_tokens // (B * T * ddp_world_size) # calculate the steps of gradient accumulation required to attain the desired global batch size. assert args.batch_size % (B * ddp_world_size) == 0 train_accumulation_steps = args.batch_size // (B * ddp_world_size) # load tokens train_loader = DistributedDataLoader(args.input_bin, B, T, ddp_rank, ddp_world_size) val_loader = DistributedDataLoader(args.input_val_bin, B, T, ddp_rank, ddp_world_size) if master_process: print(f"Training DataLoader: total number of tokens: {train_loader.ntok_total} across {len(train_loader.files)} files") print(f"Validation DataLoader: total number of tokens: {val_loader.ntok_total} across {len(val_loader.files)} files") x, y = train_loader.next_batch() # there are only 50257 unique GPT-2 tokens; we extend to nearest multiple of 128 for efficiency. suggested to me by @Grad62304977. # this originates from Karpathy's experiments. num_vocab = 50304 model = GPT(GPTConfig(vocab_size=num_vocab, n_layer=12, n_head=6, n_embd=768)) model = model.cuda() if hasattr(config, "coordinate_descent_tuning"): config.coordinate_descent_tuning = True # suggested by @Chillee model = torch.compile(model) # here we wrap model into DDP container model = DDP(model, device_ids=[ddp_local_rank]) raw_model = model.module # always contains the "raw" unwrapped model ctx = torch.amp.autocast(device_type='cuda', dtype=torch.bfloat16) # CUDNN attention is ~4ms faster than Flash, but doesn't get selected by default in PyTorch 2.5.1 from torch.backends.cuda import enable_cudnn_sdp, enable_flash_sdp, enable_math_sdp, enable_mem_efficient_sdp enable_cudnn_sdp(True) enable_flash_sdp(False) enable_mem_efficient_sdp(False) enable_math_sdp(False) # init the optimizer(s) optimizer1 = torch.optim.Adam([raw_model.transformer.wte.weight], lr=0.3, betas=(0.9, 0.95), fused=True) optimizer2 = torch.optim.Adam([raw_model.lm_head.weight], lr=0.002, betas=(0.9, 0.95), fused=True) params = list(raw_model.transformer.h.parameters()) matrix_params = [p for p in params if p.ndim == 2] scalar_params = [p for p in params if p.ndim < 2] optimizer3 = Muon(matrix_params, lr=0.02, momentum=0.95) optimizer4 = torch.optim.Adam(scalar_params, lr=0.02, betas=(0.9, 0.95), fused=True) # note that this learning rate is neither sensitive nor tuned optimizers = [optimizer1, optimizer2, optimizer3, optimizer4] # learning rate decay scheduler (linear warmup and warmdown) def get_lr(it): assert it <= args.num_iterations # 1) linear warmup for warmup_iters steps if it < args.warmup_iters: return (it+1) / args.warmup_iters # 2) constant lr for a while elif it < args.num_iterations - args.warmdown_iters: return 1.0 # 3) linear warmdown else: decay_ratio = (args.num_iterations - it) / args.warmdown_iters return decay_ratio schedulers = [torch.optim.lr_scheduler.LambdaLR(opt, get_lr) for opt in optimizers] # begin logging if master_process: run_id = str(uuid.uuid4()) logdir = 'logs/%s/' % run_id os.makedirs(logdir, exist_ok=True) logfile = 'logs/%s.txt' % run_id # create the log file with open(logfile, "w") as f: # begin the log by printing this file (the Python code) f.write('='*100 + '\n') f.write(code) f.write('='*100 + '\n') # log information about the hardware/software environment this is running on # and print the full `nvidia-smi` to file f.write(f"Running pytorch {torch.version.__version__} compiled for CUDA {torch.version.cuda}\nnvidia-smi:\n") import subprocess result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) f.write(f'{result.stdout}\n') f.write('='*100 + '\n') training_time_ms = 0 # start the clock torch.cuda.synchronize() t0 = time.time() # begin training train_loader.reset() for step in range(args.num_iterations + 1): last_step = (step == args.num_iterations) # This effectively ignores timing first 10 steps, which are slower for weird reasons. # Alternately, and slightly more correctly in terms of benchmarking, we could do 10 # steps with dummy data first, and then re-initialize the model and reset the loader. if step == 10: training_time_ms = 0 t0 = time.time() timed_steps = float('nan') if step <= 11 else (step - 10) + 1 # <= 11 to avoid bug in val # once in a while evaluate the validation dataset if (last_step or (args.val_loss_every > 0 and step % args.val_loss_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # run validation batches model.eval() val_loader.reset() val_loss = 0.0 for _ in range(val_steps): x_val, y_val = val_loader.next_batch() with ctx: # of course, we'd like to use no_grad() here too, but that creates a torch.compile error for some reason _, loss = model(x_val, y_val, return_logits=False) val_loss += loss.detach() del loss dist.all_reduce(val_loss, op=dist.ReduceOp.AVG) val_loss /= val_steps # log val loss to console and to logfile if master_process: print(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms') with open(logfile, "a") as f: f.write(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms\n') # start the clock again torch.cuda.synchronize() t0 = time.time() if master_process and (last_step or (args.save_every > 0 and step % args.save_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # save the state of the training process log = dict(step=step, code=code, model=raw_model.state_dict(), optimizers=[opt.state_dict() for opt in optimizers]) torch.save(log, 'logs/%s/state_step%06d.pt' % (run_id, step)) # start the clock again torch.cuda.synchronize() t0 = time.time() # bit confusing: we want to make sure to eval on 0th iteration # but also after the very last iteration. so we loop for step <= num_iterations # instead of just < num_iterations (one extra due to <=), only to do # the validation/sampling one last time, and then we break right here as we're done. if last_step: break # --------------- TRAINING SECTION BEGIN ----------------- model.train() for i in range(1, train_accumulation_steps+1): # forward pass with ctx: _, loss = model(x, y, return_logits=False) train_loss = loss.detach() # advance the dataset for the next batch x, y = train_loader.next_batch() # backward pass if i < train_accumulation_steps: with model.no_sync(): # there's no need to sync gradients every accumulation step loss.backward() else: loss.backward() # just sync on the last step for p in model.parameters(): p.grad /= train_accumulation_steps # step the optimizers and schedulers for opt, sched in zip(optimizers, schedulers): opt.step() sched.step() # null the gradients model.zero_grad(set_to_none=True) # --------------- TRAINING SECTION END ------------------- # everything that follows now is just diagnostics, prints, logging, etc. #dist.all_reduce(train_loss, op=dist.ReduceOp.AVG) # all-reducing the training loss would be more correct in terms of logging, but slower if master_process: approx_time = training_time_ms + 1000 * (time.time() - t0) print(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms") with open(logfile, "a") as f: f.write(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms\n") if master_process: print(f"peak memory consumption: {torch.cuda.max_memory_allocated() // 1024 // 1024} MiB") # ------------------------------------------------------------------------- # clean up nice dist.destroy_process_group() ==================================================================================================== Running pytorch 2.5.1+cu124 compiled for CUDA 12.4 nvidia-smi: Wed Nov 6 20:00:03 2024 +-----------------------------------------------------------------------------------------+ | NVIDIA-SMI 555.42.06 Driver Version: 555.42.06 CUDA Version: 12.5 | |-----------------------------------------+------------------------+----------------------+ | GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |=========================================+========================+======================| | 0 NVIDIA H100 80GB HBM3 Off | 00000000:18:00.0 Off | 0 | | N/A 33C P0 142W / 700W | 5304MiB / 81559MiB | 5% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 1 NVIDIA H100 80GB HBM3 Off | 00000000:2A:00.0 Off | 0 | | N/A 34C P0 130W / 700W | 5352MiB / 81559MiB | 1% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 2 NVIDIA H100 80GB HBM3 Off | 00000000:3A:00.0 Off | 0 | | N/A 34C P0 126W / 700W | 5352MiB / 81559MiB | 2% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 3 NVIDIA H100 80GB HBM3 Off | 00000000:5D:00.0 Off | 0 | | N/A 32C P0 137W / 700W | 5352MiB / 81559MiB | 3% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 4 NVIDIA H100 80GB HBM3 Off | 00000000:9A:00.0 Off | 0 | | N/A 34C P0 142W / 700W | 5352MiB / 81559MiB | 1% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 5 NVIDIA H100 80GB HBM3 Off | 00000000:AB:00.0 Off | 0 | | N/A 36C P0 142W / 700W | 5352MiB / 81559MiB | 5% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 6 NVIDIA H100 80GB HBM3 Off | 00000000:BA:00.0 Off | 0 | | N/A 35C P0 142W / 700W | 5352MiB / 81559MiB | 10% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 7 NVIDIA H100 80GB HBM3 Off | 00000000:DB:00.0 Off | 0 | | N/A 34C P0 147W / 700W | 5112MiB / 81559MiB | 5% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ +-----------------------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=========================================================================================| | 0 N/A N/A 30667 C /usr/bin/python3 0MiB | | 1 N/A N/A 30668 C /usr/bin/python3 0MiB | | 2 N/A N/A 30669 C /usr/bin/python3 0MiB | | 3 N/A N/A 30670 C /usr/bin/python3 0MiB | | 4 N/A N/A 30671 C /usr/bin/python3 0MiB | | 5 N/A N/A 30672 C /usr/bin/python3 0MiB | | 6 N/A N/A 30673 C /usr/bin/python3 0MiB | | 7 N/A N/A 30674 C /usr/bin/python3 0MiB | +-----------------------------------------------------------------------------------------+ ==================================================================================================== step:0/3200 val_loss:10.8258 train_time:468ms step_avg:nanms step:1/3200 train_loss:10.8258 train_time:5057ms step_avg:nanms step:2/3200 train_loss:10.4277 train_time:5152ms step_avg:nanms step:3/3200 train_loss:9.9264 train_time:5290ms step_avg:nanms step:4/3200 train_loss:9.0400 train_time:5431ms step_avg:nanms step:5/3200 train_loss:8.1044 train_time:5575ms step_avg:nanms step:6/3200 train_loss:7.5537 train_time:5715ms step_avg:nanms step:7/3200 train_loss:7.0321 train_time:5857ms step_avg:nanms step:8/3200 train_loss:7.2060 train_time:6002ms step_avg:nanms step:9/3200 train_loss:6.8985 train_time:6150ms step_avg:nanms step:10/3200 train_loss:6.7505 train_time:6295ms step_avg:nanms step:11/3200 train_loss:6.7331 train_time:92ms step_avg:nanms step:12/3200 train_loss:6.6795 train_time:238ms step_avg:nanms step:13/3200 train_loss:6.5057 train_time:378ms step_avg:126.05ms step:14/3200 train_loss:6.4959 train_time:521ms step_avg:130.29ms step:15/3200 train_loss:6.4821 train_time:665ms step_avg:133.10ms step:16/3200 train_loss:6.4385 train_time:812ms step_avg:135.30ms step:17/3200 train_loss:6.4396 train_time:958ms step_avg:136.81ms step:18/3200 train_loss:6.4996 train_time:1102ms step_avg:137.71ms step:19/3200 train_loss:6.3301 train_time:1246ms step_avg:138.42ms step:20/3200 train_loss:6.3491 train_time:1388ms step_avg:138.79ms step:21/3200 train_loss:6.0631 train_time:1531ms step_avg:139.14ms step:22/3200 train_loss:6.3816 train_time:1674ms step_avg:139.51ms step:23/3200 train_loss:6.6105 train_time:1821ms step_avg:140.05ms step:24/3200 train_loss:6.2722 train_time:1966ms step_avg:140.41ms step:25/3200 train_loss:6.4143 train_time:2109ms step_avg:140.62ms step:26/3200 train_loss:6.1295 train_time:2255ms step_avg:140.91ms step:27/3200 train_loss:6.0467 train_time:2399ms step_avg:141.12ms step:28/3200 train_loss:6.2120 train_time:2544ms step_avg:141.31ms step:29/3200 train_loss:5.8839 train_time:2686ms step_avg:141.39ms step:30/3200 train_loss:6.1378 train_time:2832ms step_avg:141.59ms step:31/3200 train_loss:5.9845 train_time:2975ms step_avg:141.69ms step:32/3200 train_loss:5.9490 train_time:3121ms step_avg:141.85ms step:33/3200 train_loss:5.7895 train_time:3264ms step_avg:141.93ms step:34/3200 train_loss:6.0959 train_time:3408ms step_avg:142.00ms step:35/3200 train_loss:6.0088 train_time:3552ms step_avg:142.09ms step:36/3200 train_loss:6.1516 train_time:3696ms step_avg:142.17ms step:37/3200 train_loss:6.0706 train_time:3841ms step_avg:142.27ms step:38/3200 train_loss:5.9719 train_time:3987ms step_avg:142.38ms step:39/3200 train_loss:5.8612 train_time:4131ms step_avg:142.44ms step:40/3200 train_loss:5.8743 train_time:4274ms step_avg:142.48ms step:41/3200 train_loss:5.7887 train_time:4419ms step_avg:142.55ms step:42/3200 train_loss:5.7989 train_time:4563ms step_avg:142.58ms step:43/3200 train_loss:5.7009 train_time:4708ms step_avg:142.66ms step:44/3200 train_loss:5.7826 train_time:4853ms step_avg:142.72ms step:45/3200 train_loss:5.7654 train_time:4998ms step_avg:142.80ms step:46/3200 train_loss:5.9062 train_time:5142ms step_avg:142.84ms step:47/3200 train_loss:5.6970 train_time:5286ms step_avg:142.86ms step:48/3200 train_loss:5.5626 train_time:5430ms step_avg:142.90ms step:49/3200 train_loss:5.7682 train_time:5573ms step_avg:142.91ms step:50/3200 train_loss:5.6482 train_time:5719ms step_avg:142.98ms step:51/3200 train_loss:5.7993 train_time:5868ms step_avg:143.12ms step:52/3200 train_loss:5.6592 train_time:6008ms step_avg:143.05ms step:53/3200 train_loss:5.5113 train_time:6153ms step_avg:143.09ms step:54/3200 train_loss:5.6285 train_time:6298ms step_avg:143.14ms step:55/3200 train_loss:5.5081 train_time:6451ms step_avg:143.36ms step:56/3200 train_loss:5.8624 train_time:6587ms step_avg:143.20ms step:57/3200 train_loss:5.5021 train_time:6732ms step_avg:143.23ms step:58/3200 train_loss:5.3862 train_time:6879ms step_avg:143.31ms step:59/3200 train_loss:5.5283 train_time:7024ms step_avg:143.34ms step:60/3200 train_loss:5.4716 train_time:7166ms step_avg:143.33ms step:61/3200 train_loss:5.5766 train_time:7310ms step_avg:143.33ms step:62/3200 train_loss:5.3544 train_time:7454ms step_avg:143.34ms step:63/3200 train_loss:5.4468 train_time:7598ms step_avg:143.36ms step:64/3200 train_loss:5.4201 train_time:7742ms step_avg:143.37ms step:65/3200 train_loss:5.1822 train_time:7887ms step_avg:143.39ms step:66/3200 train_loss:5.2321 train_time:8032ms step_avg:143.42ms step:67/3200 train_loss:5.3875 train_time:8175ms step_avg:143.43ms step:68/3200 train_loss:5.2623 train_time:8320ms step_avg:143.45ms step:69/3200 train_loss:5.5007 train_time:8464ms step_avg:143.46ms step:70/3200 train_loss:5.1601 train_time:8608ms step_avg:143.47ms step:71/3200 train_loss:5.2258 train_time:8763ms step_avg:143.66ms step:72/3200 train_loss:5.3815 train_time:8899ms step_avg:143.53ms step:73/3200 train_loss:5.3199 train_time:9046ms step_avg:143.58ms step:74/3200 train_loss:5.2052 train_time:9188ms step_avg:143.56ms step:75/3200 train_loss:5.3241 train_time:9333ms step_avg:143.58ms step:76/3200 train_loss:5.3018 train_time:9478ms step_avg:143.60ms step:77/3200 train_loss:5.2375 train_time:9623ms step_avg:143.63ms step:78/3200 train_loss:5.3337 train_time:9767ms step_avg:143.63ms step:79/3200 train_loss:5.4228 train_time:9912ms step_avg:143.65ms step:80/3200 train_loss:5.1899 train_time:10057ms step_avg:143.67ms step:81/3200 train_loss:5.2829 train_time:10201ms step_avg:143.67ms step:82/3200 train_loss:5.0479 train_time:10346ms step_avg:143.70ms step:83/3200 train_loss:5.2235 train_time:10488ms step_avg:143.67ms step:84/3200 train_loss:5.1736 train_time:10633ms step_avg:143.69ms step:85/3200 train_loss:5.1595 train_time:10779ms step_avg:143.72ms step:86/3200 train_loss:5.0162 train_time:10924ms step_avg:143.73ms step:87/3200 train_loss:5.2165 train_time:11067ms step_avg:143.73ms step:88/3200 train_loss:5.1293 train_time:11211ms step_avg:143.73ms step:89/3200 train_loss:5.1791 train_time:11355ms step_avg:143.73ms step:90/3200 train_loss:5.1493 train_time:11500ms step_avg:143.75ms step:91/3200 train_loss:5.0613 train_time:11644ms step_avg:143.75ms step:92/3200 train_loss:5.0640 train_time:11789ms step_avg:143.77ms step:93/3200 train_loss:5.1943 train_time:11933ms step_avg:143.77ms step:94/3200 train_loss:5.0149 train_time:12077ms step_avg:143.78ms step:95/3200 train_loss:5.0093 train_time:12223ms step_avg:143.80ms step:96/3200 train_loss:5.0600 train_time:12364ms step_avg:143.77ms step:97/3200 train_loss:4.9582 train_time:12508ms step_avg:143.77ms step:98/3200 train_loss:5.0319 train_time:12652ms step_avg:143.77ms step:99/3200 train_loss:4.9648 train_time:12797ms step_avg:143.78ms step:100/3200 train_loss:5.0792 train_time:12941ms step_avg:143.78ms step:101/3200 train_loss:5.0468 train_time:13085ms step_avg:143.80ms step:102/3200 train_loss:4.9287 train_time:13230ms step_avg:143.80ms step:103/3200 train_loss:5.0547 train_time:13372ms step_avg:143.79ms step:104/3200 train_loss:5.0019 train_time:13516ms step_avg:143.79ms step:105/3200 train_loss:4.8779 train_time:13661ms step_avg:143.80ms step:106/3200 train_loss:4.9291 train_time:13805ms step_avg:143.80ms step:107/3200 train_loss:5.1277 train_time:13949ms step_avg:143.81ms step:108/3200 train_loss:4.9079 train_time:14093ms step_avg:143.81ms step:109/3200 train_loss:4.7063 train_time:14239ms step_avg:143.83ms step:110/3200 train_loss:4.8790 train_time:14384ms step_avg:143.84ms step:111/3200 train_loss:4.8645 train_time:14528ms step_avg:143.84ms step:112/3200 train_loss:4.8285 train_time:14672ms step_avg:143.84ms step:113/3200 train_loss:4.9542 train_time:14816ms step_avg:143.84ms step:114/3200 train_loss:4.8570 train_time:14962ms step_avg:143.87ms step:115/3200 train_loss:4.7215 train_time:15107ms step_avg:143.87ms step:116/3200 train_loss:4.8736 train_time:15250ms step_avg:143.87ms step:117/3200 train_loss:4.7912 train_time:15394ms step_avg:143.87ms step:118/3200 train_loss:4.7368 train_time:15539ms step_avg:143.88ms step:119/3200 train_loss:4.8955 train_time:15682ms step_avg:143.87ms step:120/3200 train_loss:4.8291 train_time:15826ms step_avg:143.87ms step:121/3200 train_loss:4.7426 train_time:15970ms step_avg:143.87ms step:122/3200 train_loss:4.6608 train_time:16116ms step_avg:143.89ms step:123/3200 train_loss:4.7758 train_time:16263ms step_avg:143.92ms step:124/3200 train_loss:4.6403 train_time:16407ms step_avg:143.92ms step:125/3200 train_loss:4.9455 train_time:16556ms step_avg:143.96ms step:125/3200 val_loss:4.7642 train_time:16601ms step_avg:144.36ms step:126/3200 train_loss:4.8066 train_time:16707ms step_avg:144.02ms step:127/3200 train_loss:4.7642 train_time:16855ms step_avg:144.06ms step:128/3200 train_loss:4.7975 train_time:17000ms step_avg:144.07ms step:129/3200 train_loss:4.6926 train_time:17142ms step_avg:144.05ms step:130/3200 train_loss:4.9961 train_time:17285ms step_avg:144.04ms step:131/3200 train_loss:4.7391 train_time:17427ms step_avg:144.03ms step:132/3200 train_loss:4.7290 train_time:17570ms step_avg:144.01ms step:133/3200 train_loss:4.6729 train_time:17718ms step_avg:144.05ms step:134/3200 train_loss:4.7517 train_time:17865ms step_avg:144.08ms step:135/3200 train_loss:4.6157 train_time:18009ms step_avg:144.07ms step:136/3200 train_loss:4.7401 train_time:18151ms step_avg:144.06ms step:137/3200 train_loss:4.5167 train_time:18294ms step_avg:144.05ms step:138/3200 train_loss:4.6880 train_time:18438ms step_avg:144.04ms step:139/3200 train_loss:4.6146 train_time:18581ms step_avg:144.04ms step:140/3200 train_loss:4.6610 train_time:18725ms step_avg:144.04ms step:141/3200 train_loss:4.7275 train_time:18873ms step_avg:144.07ms step:142/3200 train_loss:4.6006 train_time:19019ms step_avg:144.08ms step:143/3200 train_loss:4.6243 train_time:19161ms step_avg:144.07ms step:144/3200 train_loss:4.5182 train_time:19304ms step_avg:144.06ms step:145/3200 train_loss:4.6322 train_time:19446ms step_avg:144.04ms step:146/3200 train_loss:4.5798 train_time:19589ms step_avg:144.04ms step:147/3200 train_loss:4.4703 train_time:19733ms step_avg:144.04ms step:148/3200 train_loss:4.6088 train_time:19880ms step_avg:144.06ms step:149/3200 train_loss:4.6189 train_time:20025ms step_avg:144.06ms step:150/3200 train_loss:4.5942 train_time:20170ms step_avg:144.07ms step:151/3200 train_loss:4.6858 train_time:20313ms step_avg:144.06ms step:152/3200 train_loss:4.5439 train_time:20456ms step_avg:144.05ms step:153/3200 train_loss:4.5385 train_time:20599ms step_avg:144.05ms step:154/3200 train_loss:4.6081 train_time:20742ms step_avg:144.04ms step:155/3200 train_loss:4.5871 train_time:20887ms step_avg:144.05ms step:156/3200 train_loss:4.5276 train_time:21031ms step_avg:144.05ms step:157/3200 train_loss:4.5703 train_time:21175ms step_avg:144.05ms step:158/3200 train_loss:4.6589 train_time:21319ms step_avg:144.05ms step:159/3200 train_loss:4.4782 train_time:21462ms step_avg:144.04ms step:160/3200 train_loss:4.5373 train_time:21606ms step_avg:144.04ms step:161/3200 train_loss:4.3517 train_time:21748ms step_avg:144.03ms step:162/3200 train_loss:4.5520 train_time:21892ms step_avg:144.03ms step:163/3200 train_loss:4.5658 train_time:22037ms step_avg:144.03ms step:164/3200 train_loss:4.5513 train_time:22182ms step_avg:144.04ms step:165/3200 train_loss:4.4000 train_time:22325ms step_avg:144.03ms step:166/3200 train_loss:4.4844 train_time:22470ms step_avg:144.04ms step:167/3200 train_loss:4.5818 train_time:22613ms step_avg:144.03ms step:168/3200 train_loss:4.4028 train_time:22758ms step_avg:144.04ms step:169/3200 train_loss:4.4828 train_time:22902ms step_avg:144.04ms step:170/3200 train_loss:4.3751 train_time:23046ms step_avg:144.04ms step:171/3200 train_loss:4.2514 train_time:23190ms step_avg:144.04ms step:172/3200 train_loss:4.3896 train_time:23333ms step_avg:144.03ms step:173/3200 train_loss:4.4060 train_time:23478ms step_avg:144.04ms step:174/3200 train_loss:4.4554 train_time:23622ms step_avg:144.03ms step:175/3200 train_loss:4.6129 train_time:23766ms step_avg:144.03ms step:176/3200 train_loss:4.4438 train_time:23909ms step_avg:144.03ms step:177/3200 train_loss:4.2996 train_time:24052ms step_avg:144.03ms step:178/3200 train_loss:4.2647 train_time:24198ms step_avg:144.04ms step:179/3200 train_loss:4.3607 train_time:24341ms step_avg:144.03ms step:180/3200 train_loss:4.3360 train_time:24484ms step_avg:144.03ms step:181/3200 train_loss:4.3026 train_time:24628ms step_avg:144.02ms step:182/3200 train_loss:4.4704 train_time:24772ms step_avg:144.03ms step:183/3200 train_loss:4.3440 train_time:24917ms step_avg:144.03ms step:184/3200 train_loss:4.3161 train_time:25061ms step_avg:144.03ms step:185/3200 train_loss:4.3086 train_time:25205ms step_avg:144.03ms step:186/3200 train_loss:4.3975 train_time:25348ms step_avg:144.02ms step:187/3200 train_loss:4.3532 train_time:25491ms step_avg:144.02ms step:188/3200 train_loss:4.4424 train_time:25634ms step_avg:144.01ms step:189/3200 train_loss:4.3477 train_time:25973ms step_avg:145.10ms step:190/3200 train_loss:4.2782 train_time:26318ms step_avg:146.21ms step:191/3200 train_loss:4.3825 train_time:26457ms step_avg:146.17ms step:192/3200 train_loss:4.2537 train_time:26601ms step_avg:146.16ms step:193/3200 train_loss:4.1915 train_time:26743ms step_avg:146.14ms step:194/3200 train_loss:4.4087 train_time:26885ms step_avg:146.11ms step:195/3200 train_loss:4.3274 train_time:27027ms step_avg:146.09ms step:196/3200 train_loss:4.5295 train_time:27171ms step_avg:146.08ms step:197/3200 train_loss:4.3629 train_time:27319ms step_avg:146.09ms step:198/3200 train_loss:4.2076 train_time:27463ms step_avg:146.08ms step:199/3200 train_loss:4.3324 train_time:27606ms step_avg:146.06ms step:200/3200 train_loss:4.1907 train_time:27750ms step_avg:146.05ms step:201/3200 train_loss:4.2851 train_time:27892ms step_avg:146.03ms step:202/3200 train_loss:4.1604 train_time:28035ms step_avg:146.01ms step:203/3200 train_loss:4.3980 train_time:28179ms step_avg:146.01ms step:204/3200 train_loss:4.2326 train_time:28325ms step_avg:146.00ms step:205/3200 train_loss:4.3410 train_time:28468ms step_avg:145.99ms step:206/3200 train_loss:4.4017 train_time:28613ms step_avg:145.98ms step:207/3200 train_loss:4.1046 train_time:28757ms step_avg:145.97ms step:208/3200 train_loss:4.2434 train_time:28900ms step_avg:145.96ms step:209/3200 train_loss:4.2389 train_time:29043ms step_avg:145.94ms step:210/3200 train_loss:4.3931 train_time:29186ms step_avg:145.93ms step:211/3200 train_loss:4.3154 train_time:29329ms step_avg:145.92ms step:212/3200 train_loss:4.2099 train_time:29474ms step_avg:145.91ms step:213/3200 train_loss:4.2405 train_time:29619ms step_avg:145.91ms step:214/3200 train_loss:4.1837 train_time:29762ms step_avg:145.89ms step:215/3200 train_loss:4.2573 train_time:29905ms step_avg:145.88ms step:216/3200 train_loss:4.0767 train_time:30049ms step_avg:145.87ms step:217/3200 train_loss:4.1491 train_time:30193ms step_avg:145.86ms step:218/3200 train_loss:4.1548 train_time:30338ms step_avg:145.85ms step:219/3200 train_loss:4.2218 train_time:30482ms step_avg:145.85ms step:220/3200 train_loss:4.2124 train_time:30626ms step_avg:145.84ms step:221/3200 train_loss:4.2362 train_time:30770ms step_avg:145.83ms step:222/3200 train_loss:4.2478 train_time:30914ms step_avg:145.82ms step:223/3200 train_loss:4.1670 train_time:31059ms step_avg:145.82ms step:224/3200 train_loss:4.1225 train_time:31202ms step_avg:145.80ms step:225/3200 train_loss:4.4274 train_time:31345ms step_avg:145.79ms step:226/3200 train_loss:4.0384 train_time:31489ms step_avg:145.78ms step:227/3200 train_loss:4.1239 train_time:31632ms step_avg:145.77ms step:228/3200 train_loss:4.1380 train_time:31776ms step_avg:145.76ms step:229/3200 train_loss:4.2763 train_time:31951ms step_avg:145.89ms step:230/3200 train_loss:4.0614 train_time:32066ms step_avg:145.76ms step:231/3200 train_loss:4.1888 train_time:32209ms step_avg:145.74ms step:232/3200 train_loss:4.0396 train_time:32350ms step_avg:145.72ms step:233/3200 train_loss:4.1087 train_time:32494ms step_avg:145.71ms step:234/3200 train_loss:4.2369 train_time:32639ms step_avg:145.71ms step:235/3200 train_loss:4.1598 train_time:32784ms step_avg:145.71ms step:236/3200 train_loss:4.0389 train_time:32927ms step_avg:145.69ms step:237/3200 train_loss:4.2103 train_time:33070ms step_avg:145.68ms step:238/3200 train_loss:4.2156 train_time:33214ms step_avg:145.68ms step:239/3200 train_loss:4.0697 train_time:33358ms step_avg:145.67ms step:240/3200 train_loss:4.2118 train_time:33502ms step_avg:145.66ms step:241/3200 train_loss:4.2426 train_time:33645ms step_avg:145.65ms step:242/3200 train_loss:4.0994 train_time:33789ms step_avg:145.64ms step:243/3200 train_loss:4.2790 train_time:33933ms step_avg:145.63ms step:244/3200 train_loss:4.1443 train_time:34077ms step_avg:145.63ms step:245/3200 train_loss:4.1983 train_time:34221ms step_avg:145.62ms step:246/3200 train_loss:4.2746 train_time:34365ms step_avg:145.61ms step:247/3200 train_loss:4.2004 train_time:34509ms step_avg:145.61ms step:248/3200 train_loss:4.1343 train_time:34654ms step_avg:145.60ms step:249/3200 train_loss:4.2455 train_time:34799ms step_avg:145.60ms step:250/3200 train_loss:4.0521 train_time:34943ms step_avg:145.60ms step:250/3200 val_loss:4.1363 train_time:34994ms step_avg:145.81ms step:251/3200 train_loss:4.0946 train_time:35096ms step_avg:145.63ms step:252/3200 train_loss:4.2044 train_time:35244ms step_avg:145.63ms step:253/3200 train_loss:4.2711 train_time:35387ms step_avg:145.63ms step:254/3200 train_loss:4.0635 train_time:35529ms step_avg:145.61ms step:255/3200 train_loss:4.0113 train_time:35672ms step_avg:145.60ms step:256/3200 train_loss:4.1965 train_time:35815ms step_avg:145.59ms step:257/3200 train_loss:4.1067 train_time:35959ms step_avg:145.58ms step:258/3200 train_loss:4.1241 train_time:36105ms step_avg:145.58ms step:259/3200 train_loss:4.0943 train_time:36248ms step_avg:145.58ms step:260/3200 train_loss:4.1533 train_time:36393ms step_avg:145.57ms step:261/3200 train_loss:4.1807 train_time:36538ms step_avg:145.57ms step:262/3200 train_loss:4.1500 train_time:36681ms step_avg:145.56ms step:263/3200 train_loss:4.1135 train_time:36824ms step_avg:145.55ms step:264/3200 train_loss:4.0244 train_time:36966ms step_avg:145.54ms step:265/3200 train_loss:4.1058 train_time:37110ms step_avg:145.53ms step:266/3200 train_loss:3.9794 train_time:37256ms step_avg:145.53ms step:267/3200 train_loss:4.0327 train_time:37400ms step_avg:145.53ms step:268/3200 train_loss:4.0389 train_time:37543ms step_avg:145.52ms step:269/3200 train_loss:4.0719 train_time:37687ms step_avg:145.51ms step:270/3200 train_loss:3.9799 train_time:37830ms step_avg:145.50ms step:271/3200 train_loss:4.2151 train_time:37973ms step_avg:145.49ms step:272/3200 train_loss:4.1005 train_time:38118ms step_avg:145.49ms step:273/3200 train_loss:4.0259 train_time:38262ms step_avg:145.48ms step:274/3200 train_loss:4.0747 train_time:38406ms step_avg:145.48ms step:275/3200 train_loss:4.1593 train_time:38550ms step_avg:145.47ms step:276/3200 train_loss:4.1782 train_time:38693ms step_avg:145.46ms step:277/3200 train_loss:4.3467 train_time:38838ms step_avg:145.46ms step:278/3200 train_loss:4.1500 train_time:38981ms step_avg:145.45ms step:279/3200 train_loss:4.2059 train_time:39124ms step_avg:145.44ms step:280/3200 train_loss:4.1152 train_time:39268ms step_avg:145.44ms step:281/3200 train_loss:4.2480 train_time:39412ms step_avg:145.43ms step:282/3200 train_loss:4.0736 train_time:39558ms step_avg:145.43ms step:283/3200 train_loss:4.0794 train_time:39701ms step_avg:145.43ms step:284/3200 train_loss:4.0233 train_time:39845ms step_avg:145.42ms step:285/3200 train_loss:4.1633 train_time:39988ms step_avg:145.41ms step:286/3200 train_loss:4.1767 train_time:40131ms step_avg:145.40ms step:287/3200 train_loss:4.2082 train_time:40276ms step_avg:145.40ms step:288/3200 train_loss:4.0314 train_time:40421ms step_avg:145.40ms step:289/3200 train_loss:4.1322 train_time:40565ms step_avg:145.40ms step:290/3200 train_loss:3.9860 train_time:40708ms step_avg:145.39ms step:291/3200 train_loss:3.9825 train_time:40852ms step_avg:145.38ms step:292/3200 train_loss:4.0611 train_time:40997ms step_avg:145.38ms step:293/3200 train_loss:3.9793 train_time:41141ms step_avg:145.38ms step:294/3200 train_loss:4.0291 train_time:41286ms step_avg:145.37ms step:295/3200 train_loss:4.0647 train_time:41430ms step_avg:145.37ms step:296/3200 train_loss:3.9531 train_time:41574ms step_avg:145.36ms step:297/3200 train_loss:3.9687 train_time:41719ms step_avg:145.36ms step:298/3200 train_loss:3.9720 train_time:41863ms step_avg:145.36ms step:299/3200 train_loss:4.0835 train_time:42006ms step_avg:145.35ms step:300/3200 train_loss:3.9462 train_time:42150ms step_avg:145.34ms step:301/3200 train_loss:4.0784 train_time:42294ms step_avg:145.34ms step:302/3200 train_loss:4.0931 train_time:42439ms step_avg:145.34ms step:303/3200 train_loss:4.0431 train_time:42582ms step_avg:145.33ms step:304/3200 train_loss:4.0887 train_time:42725ms step_avg:145.32ms step:305/3200 train_loss:4.0742 train_time:42869ms step_avg:145.32ms step:306/3200 train_loss:4.5628 train_time:43012ms step_avg:145.31ms step:307/3200 train_loss:4.0485 train_time:43158ms step_avg:145.31ms step:308/3200 train_loss:3.9531 train_time:43302ms step_avg:145.31ms step:309/3200 train_loss:4.0959 train_time:43448ms step_avg:145.31ms step:310/3200 train_loss:3.9693 train_time:43591ms step_avg:145.30ms step:311/3200 train_loss:4.1957 train_time:43735ms step_avg:145.30ms step:312/3200 train_loss:4.0348 train_time:43879ms step_avg:145.29ms step:313/3200 train_loss:3.9831 train_time:44023ms step_avg:145.29ms step:314/3200 train_loss:4.0720 train_time:44165ms step_avg:145.28ms step:315/3200 train_loss:4.1955 train_time:44310ms step_avg:145.28ms step:316/3200 train_loss:4.0620 train_time:44455ms step_avg:145.28ms step:317/3200 train_loss:3.9042 train_time:44599ms step_avg:145.27ms step:318/3200 train_loss:3.9847 train_time:44742ms step_avg:145.26ms step:319/3200 train_loss:4.0223 train_time:44884ms step_avg:145.26ms step:320/3200 train_loss:3.9990 train_time:45027ms step_avg:145.25ms step:321/3200 train_loss:4.1167 train_time:45171ms step_avg:145.24ms step:322/3200 train_loss:4.0644 train_time:45315ms step_avg:145.24ms step:323/3200 train_loss:4.0389 train_time:45460ms step_avg:145.24ms step:324/3200 train_loss:4.1195 train_time:45604ms step_avg:145.23ms step:325/3200 train_loss:4.0643 train_time:45748ms step_avg:145.23ms step:326/3200 train_loss:4.1346 train_time:45891ms step_avg:145.22ms step:327/3200 train_loss:3.9962 train_time:46034ms step_avg:145.22ms step:328/3200 train_loss:4.4947 train_time:46180ms step_avg:145.22ms step:329/3200 train_loss:4.1808 train_time:46323ms step_avg:145.21ms step:330/3200 train_loss:3.9225 train_time:46467ms step_avg:145.21ms step:331/3200 train_loss:3.8645 train_time:46610ms step_avg:145.20ms step:332/3200 train_loss:4.0884 train_time:46755ms step_avg:145.20ms step:333/3200 train_loss:4.0186 train_time:46899ms step_avg:145.20ms step:334/3200 train_loss:3.9855 train_time:47043ms step_avg:145.19ms step:335/3200 train_loss:3.9493 train_time:47186ms step_avg:145.19ms step:336/3200 train_loss:4.1272 train_time:47330ms step_avg:145.18ms step:337/3200 train_loss:4.0664 train_time:47474ms step_avg:145.18ms step:338/3200 train_loss:4.5378 train_time:47619ms step_avg:145.18ms step:339/3200 train_loss:4.0457 train_time:47762ms step_avg:145.17ms step:340/3200 train_loss:3.9941 train_time:47906ms step_avg:145.17ms step:341/3200 train_loss:4.0387 train_time:48048ms step_avg:145.16ms step:342/3200 train_loss:3.9535 train_time:48194ms step_avg:145.16ms step:343/3200 train_loss:3.9217 train_time:48339ms step_avg:145.16ms step:344/3200 train_loss:3.9524 train_time:48483ms step_avg:145.16ms step:345/3200 train_loss:4.1016 train_time:48627ms step_avg:145.15ms step:346/3200 train_loss:3.9491 train_time:48771ms step_avg:145.15ms step:347/3200 train_loss:3.8796 train_time:48914ms step_avg:145.15ms step:348/3200 train_loss:3.9108 train_time:49059ms step_avg:145.15ms step:349/3200 train_loss:3.9737 train_time:49201ms step_avg:145.14ms step:350/3200 train_loss:3.9346 train_time:49345ms step_avg:145.13ms step:351/3200 train_loss:3.6668 train_time:49489ms step_avg:145.13ms step:352/3200 train_loss:3.9292 train_time:49633ms step_avg:145.12ms step:353/3200 train_loss:4.2656 train_time:49778ms step_avg:145.13ms step:354/3200 train_loss:3.7680 train_time:49923ms step_avg:145.13ms step:355/3200 train_loss:4.0386 train_time:50065ms step_avg:145.12ms step:356/3200 train_loss:3.8970 train_time:50209ms step_avg:145.11ms step:357/3200 train_loss:3.9994 train_time:50353ms step_avg:145.11ms step:358/3200 train_loss:3.9208 train_time:50498ms step_avg:145.11ms step:359/3200 train_loss:3.9512 train_time:50641ms step_avg:145.10ms step:360/3200 train_loss:3.9696 train_time:50784ms step_avg:145.10ms step:361/3200 train_loss:3.5619 train_time:50928ms step_avg:145.09ms step:362/3200 train_loss:4.1305 train_time:51072ms step_avg:145.09ms step:363/3200 train_loss:4.0239 train_time:51215ms step_avg:145.09ms step:364/3200 train_loss:3.9524 train_time:51361ms step_avg:145.09ms step:365/3200 train_loss:3.8550 train_time:51504ms step_avg:145.08ms step:366/3200 train_loss:4.0231 train_time:51648ms step_avg:145.08ms step:367/3200 train_loss:3.9754 train_time:51791ms step_avg:145.07ms step:368/3200 train_loss:3.9678 train_time:51937ms step_avg:145.07ms step:369/3200 train_loss:3.9537 train_time:52080ms step_avg:145.07ms step:370/3200 train_loss:3.8532 train_time:52223ms step_avg:145.06ms step:371/3200 train_loss:3.9993 train_time:52366ms step_avg:145.06ms step:372/3200 train_loss:3.8615 train_time:52510ms step_avg:145.06ms step:373/3200 train_loss:3.8091 train_time:52654ms step_avg:145.05ms step:374/3200 train_loss:4.0273 train_time:52799ms step_avg:145.05ms step:375/3200 train_loss:3.9503 train_time:52944ms step_avg:145.05ms step:375/3200 val_loss:3.9426 train_time:52996ms step_avg:145.19ms step:376/3200 train_loss:3.9169 train_time:53100ms step_avg:145.08ms step:377/3200 train_loss:3.9748 train_time:53247ms step_avg:145.09ms step:378/3200 train_loss:3.8958 train_time:53555ms step_avg:145.53ms step:379/3200 train_loss:3.9560 train_time:53699ms step_avg:145.53ms step:380/3200 train_loss:3.9758 train_time:54026ms step_avg:146.02ms step:381/3200 train_loss:4.0580 train_time:54166ms step_avg:146.00ms step:382/3200 train_loss:3.9537 train_time:54309ms step_avg:145.99ms step:383/3200 train_loss:3.9264 train_time:54451ms step_avg:145.98ms step:384/3200 train_loss:3.9006 train_time:54593ms step_avg:145.97ms step:385/3200 train_loss:3.9791 train_time:54736ms step_avg:145.96ms step:386/3200 train_loss:3.8962 train_time:54881ms step_avg:145.96ms step:387/3200 train_loss:3.9999 train_time:55029ms step_avg:145.97ms step:388/3200 train_loss:4.1903 train_time:55176ms step_avg:145.97ms step:389/3200 train_loss:3.9170 train_time:55319ms step_avg:145.96ms step:390/3200 train_loss:3.9066 train_time:55463ms step_avg:145.96ms step:391/3200 train_loss:4.0023 train_time:55607ms step_avg:145.95ms step:392/3200 train_loss:3.9261 train_time:55749ms step_avg:145.94ms step:393/3200 train_loss:4.0350 train_time:55892ms step_avg:145.93ms step:394/3200 train_loss:3.8735 train_time:56037ms step_avg:145.93ms step:395/3200 train_loss:4.0006 train_time:56184ms step_avg:145.93ms step:396/3200 train_loss:3.7410 train_time:56329ms step_avg:145.93ms step:397/3200 train_loss:3.9511 train_time:56472ms step_avg:145.92ms step:398/3200 train_loss:3.9857 train_time:56616ms step_avg:145.92ms step:399/3200 train_loss:3.9919 train_time:56758ms step_avg:145.91ms step:400/3200 train_loss:3.8947 train_time:56901ms step_avg:145.90ms step:401/3200 train_loss:3.9408 train_time:57044ms step_avg:145.89ms step:402/3200 train_loss:4.0204 train_time:57190ms step_avg:145.89ms step:403/3200 train_loss:3.9509 train_time:57334ms step_avg:145.89ms step:404/3200 train_loss:4.0647 train_time:57477ms step_avg:145.88ms step:405/3200 train_loss:3.8076 train_time:57621ms step_avg:145.88ms step:406/3200 train_loss:3.9068 train_time:57766ms step_avg:145.87ms step:407/3200 train_loss:4.2050 train_time:57912ms step_avg:145.87ms step:408/3200 train_loss:3.9067 train_time:58054ms step_avg:145.87ms step:409/3200 train_loss:3.9336 train_time:58198ms step_avg:145.86ms step:410/3200 train_loss:3.9697 train_time:58341ms step_avg:145.85ms step:411/3200 train_loss:3.8631 train_time:58485ms step_avg:145.85ms step:412/3200 train_loss:3.8770 train_time:58630ms step_avg:145.85ms step:413/3200 train_loss:4.2956 train_time:58773ms step_avg:145.84ms step:414/3200 train_loss:3.7372 train_time:58916ms step_avg:145.83ms step:415/3200 train_loss:4.1203 train_time:59058ms step_avg:145.82ms step:416/3200 train_loss:3.8723 train_time:59202ms step_avg:145.82ms step:417/3200 train_loss:3.8779 train_time:59348ms step_avg:145.82ms step:418/3200 train_loss:4.0717 train_time:59493ms step_avg:145.82ms step:419/3200 train_loss:3.8034 train_time:59636ms step_avg:145.81ms step:420/3200 train_loss:3.9175 train_time:59780ms step_avg:145.81ms step:421/3200 train_loss:3.8313 train_time:59924ms step_avg:145.80ms step:422/3200 train_loss:3.7664 train_time:60068ms step_avg:145.80ms step:423/3200 train_loss:3.8960 train_time:60212ms step_avg:145.79ms step:424/3200 train_loss:3.9860 train_time:60355ms step_avg:145.79ms step:425/3200 train_loss:3.7372 train_time:60499ms step_avg:145.78ms step:426/3200 train_loss:3.9143 train_time:60644ms step_avg:145.78ms step:427/3200 train_loss:3.7974 train_time:60790ms step_avg:145.78ms step:428/3200 train_loss:4.0153 train_time:60933ms step_avg:145.77ms step:429/3200 train_loss:3.9329 train_time:61076ms step_avg:145.77ms step:430/3200 train_loss:3.8735 train_time:61220ms step_avg:145.76ms step:431/3200 train_loss:3.8392 train_time:61365ms step_avg:145.76ms step:432/3200 train_loss:3.7401 train_time:61510ms step_avg:145.76ms step:433/3200 train_loss:3.8837 train_time:61654ms step_avg:145.75ms step:434/3200 train_loss:3.9357 train_time:61797ms step_avg:145.75ms step:435/3200 train_loss:3.8837 train_time:61941ms step_avg:145.74ms step:436/3200 train_loss:3.9256 train_time:62085ms step_avg:145.74ms step:437/3200 train_loss:3.9434 train_time:62230ms step_avg:145.74ms step:438/3200 train_loss:3.8186 train_time:62376ms step_avg:145.74ms step:439/3200 train_loss:3.8309 train_time:62518ms step_avg:145.73ms step:440/3200 train_loss:3.8169 train_time:62662ms step_avg:145.73ms step:441/3200 train_loss:3.9988 train_time:62806ms step_avg:145.72ms step:442/3200 train_loss:3.8808 train_time:62951ms step_avg:145.72ms step:443/3200 train_loss:3.8619 train_time:63094ms step_avg:145.71ms step:444/3200 train_loss:3.7554 train_time:63237ms step_avg:145.71ms step:445/3200 train_loss:4.0259 train_time:63381ms step_avg:145.70ms step:446/3200 train_loss:3.9554 train_time:63525ms step_avg:145.70ms step:447/3200 train_loss:3.9483 train_time:63670ms step_avg:145.70ms step:448/3200 train_loss:3.8657 train_time:63814ms step_avg:145.69ms step:449/3200 train_loss:3.9685 train_time:63957ms step_avg:145.69ms step:450/3200 train_loss:3.8022 train_time:64101ms step_avg:145.68ms step:451/3200 train_loss:3.8366 train_time:64244ms step_avg:145.68ms step:452/3200 train_loss:3.7014 train_time:64390ms step_avg:145.68ms step:453/3200 train_loss:3.8257 train_time:64533ms step_avg:145.67ms step:454/3200 train_loss:3.7931 train_time:64676ms step_avg:145.67ms step:455/3200 train_loss:3.7519 train_time:64820ms step_avg:145.66ms step:456/3200 train_loss:3.9655 train_time:64965ms step_avg:145.66ms step:457/3200 train_loss:3.8440 train_time:65108ms step_avg:145.66ms step:458/3200 train_loss:3.9088 train_time:65252ms step_avg:145.65ms step:459/3200 train_loss:3.9539 train_time:65395ms step_avg:145.65ms step:460/3200 train_loss:3.7570 train_time:65539ms step_avg:145.64ms step:461/3200 train_loss:3.9198 train_time:65684ms step_avg:145.64ms step:462/3200 train_loss:3.8196 train_time:65829ms step_avg:145.64ms step:463/3200 train_loss:3.8445 train_time:65973ms step_avg:145.64ms step:464/3200 train_loss:3.8917 train_time:66116ms step_avg:145.63ms step:465/3200 train_loss:3.8376 train_time:66260ms step_avg:145.63ms step:466/3200 train_loss:3.8424 train_time:66403ms step_avg:145.62ms step:467/3200 train_loss:3.9290 train_time:66547ms step_avg:145.62ms step:468/3200 train_loss:3.9411 train_time:66691ms step_avg:145.61ms step:469/3200 train_loss:3.9194 train_time:66834ms step_avg:145.61ms step:470/3200 train_loss:3.8096 train_time:66977ms step_avg:145.60ms step:471/3200 train_loss:3.8904 train_time:67122ms step_avg:145.60ms step:472/3200 train_loss:3.9430 train_time:67268ms step_avg:145.60ms step:473/3200 train_loss:3.8919 train_time:67412ms step_avg:145.60ms step:474/3200 train_loss:3.8383 train_time:67557ms step_avg:145.60ms step:475/3200 train_loss:3.7034 train_time:67701ms step_avg:145.59ms step:476/3200 train_loss:4.1358 train_time:67843ms step_avg:145.59ms step:477/3200 train_loss:3.8886 train_time:67987ms step_avg:145.58ms step:478/3200 train_loss:3.7100 train_time:68131ms step_avg:145.58ms step:479/3200 train_loss:3.9407 train_time:68275ms step_avg:145.58ms step:480/3200 train_loss:3.8935 train_time:68418ms step_avg:145.57ms step:481/3200 train_loss:4.0361 train_time:68563ms step_avg:145.57ms step:482/3200 train_loss:3.8449 train_time:68708ms step_avg:145.57ms step:483/3200 train_loss:3.6548 train_time:68851ms step_avg:145.56ms step:484/3200 train_loss:3.9352 train_time:68994ms step_avg:145.56ms step:485/3200 train_loss:3.7850 train_time:69137ms step_avg:145.55ms step:486/3200 train_loss:3.7909 train_time:69282ms step_avg:145.55ms step:487/3200 train_loss:3.7206 train_time:69428ms step_avg:145.55ms step:488/3200 train_loss:3.7971 train_time:69573ms step_avg:145.55ms step:489/3200 train_loss:3.9953 train_time:69716ms step_avg:145.55ms step:490/3200 train_loss:3.8340 train_time:69860ms step_avg:145.54ms step:491/3200 train_loss:3.7246 train_time:70003ms step_avg:145.54ms step:492/3200 train_loss:3.7424 train_time:70148ms step_avg:145.54ms step:493/3200 train_loss:3.8554 train_time:70291ms step_avg:145.53ms step:494/3200 train_loss:3.6972 train_time:70434ms step_avg:145.52ms step:495/3200 train_loss:3.8366 train_time:70578ms step_avg:145.52ms step:496/3200 train_loss:3.7798 train_time:70722ms step_avg:145.52ms step:497/3200 train_loss:3.6523 train_time:70867ms step_avg:145.52ms step:498/3200 train_loss:3.8562 train_time:71011ms step_avg:145.51ms step:499/3200 train_loss:3.9236 train_time:71154ms step_avg:145.51ms step:500/3200 train_loss:3.9541 train_time:71298ms step_avg:145.51ms step:500/3200 val_loss:3.8310 train_time:71349ms step_avg:145.61ms step:501/3200 train_loss:3.8674 train_time:71454ms step_avg:145.53ms step:502/3200 train_loss:3.9295 train_time:71601ms step_avg:145.53ms step:503/3200 train_loss:3.8685 train_time:71745ms step_avg:145.53ms step:504/3200 train_loss:3.9058 train_time:71889ms step_avg:145.52ms step:505/3200 train_loss:3.8517 train_time:72032ms step_avg:145.52ms step:506/3200 train_loss:3.9423 train_time:72175ms step_avg:145.51ms step:507/3200 train_loss:3.7680 train_time:72317ms step_avg:145.51ms step:508/3200 train_loss:3.8827 train_time:72464ms step_avg:145.51ms step:509/3200 train_loss:3.9590 train_time:72612ms step_avg:145.51ms step:510/3200 train_loss:3.8971 train_time:72755ms step_avg:145.51ms step:511/3200 train_loss:3.7095 train_time:72897ms step_avg:145.50ms step:512/3200 train_loss:3.9076 train_time:73040ms step_avg:145.50ms step:513/3200 train_loss:3.8408 train_time:73184ms step_avg:145.50ms step:514/3200 train_loss:3.8043 train_time:73327ms step_avg:145.49ms step:515/3200 train_loss:3.8773 train_time:73472ms step_avg:145.49ms step:516/3200 train_loss:3.8559 train_time:73616ms step_avg:145.49ms step:517/3200 train_loss:4.2076 train_time:73760ms step_avg:145.48ms step:518/3200 train_loss:3.8094 train_time:73904ms step_avg:145.48ms step:519/3200 train_loss:3.9103 train_time:74048ms step_avg:145.48ms step:520/3200 train_loss:3.8030 train_time:74192ms step_avg:145.47ms step:521/3200 train_loss:3.8125 train_time:74334ms step_avg:145.47ms step:522/3200 train_loss:3.7681 train_time:74478ms step_avg:145.46ms step:523/3200 train_loss:3.7772 train_time:74622ms step_avg:145.46ms step:524/3200 train_loss:4.4141 train_time:74768ms step_avg:145.46ms step:525/3200 train_loss:3.8688 train_time:74913ms step_avg:145.46ms step:526/3200 train_loss:3.8082 train_time:75055ms step_avg:145.46ms step:527/3200 train_loss:3.8217 train_time:75198ms step_avg:145.45ms step:528/3200 train_loss:3.7767 train_time:75341ms step_avg:145.45ms step:529/3200 train_loss:3.7522 train_time:75487ms step_avg:145.45ms step:530/3200 train_loss:3.9737 train_time:75630ms step_avg:145.44ms step:531/3200 train_loss:3.7715 train_time:75774ms step_avg:145.44ms step:532/3200 train_loss:4.0405 train_time:75917ms step_avg:145.44ms step:533/3200 train_loss:3.8520 train_time:76062ms step_avg:145.43ms step:534/3200 train_loss:3.7829 train_time:76205ms step_avg:145.43ms step:535/3200 train_loss:3.8060 train_time:76349ms step_avg:145.43ms step:536/3200 train_loss:3.7425 train_time:76492ms step_avg:145.42ms step:537/3200 train_loss:3.8769 train_time:76636ms step_avg:145.42ms step:538/3200 train_loss:3.8544 train_time:76779ms step_avg:145.42ms step:539/3200 train_loss:3.7514 train_time:76923ms step_avg:145.41ms step:540/3200 train_loss:4.2480 train_time:77068ms step_avg:145.41ms step:541/3200 train_loss:3.7921 train_time:77211ms step_avg:145.41ms step:542/3200 train_loss:3.9069 train_time:77354ms step_avg:145.40ms step:543/3200 train_loss:3.7286 train_time:77496ms step_avg:145.40ms step:544/3200 train_loss:3.7069 train_time:77639ms step_avg:145.39ms step:545/3200 train_loss:3.7862 train_time:77784ms step_avg:145.39ms step:546/3200 train_loss:3.7214 train_time:77929ms step_avg:145.39ms step:547/3200 train_loss:3.7669 train_time:78072ms step_avg:145.39ms step:548/3200 train_loss:3.7704 train_time:78215ms step_avg:145.38ms step:549/3200 train_loss:3.7455 train_time:78358ms step_avg:145.38ms step:550/3200 train_loss:3.8520 train_time:78501ms step_avg:145.37ms step:551/3200 train_loss:3.7408 train_time:78645ms step_avg:145.37ms step:552/3200 train_loss:3.7515 train_time:78790ms step_avg:145.37ms step:553/3200 train_loss:4.0766 train_time:78934ms step_avg:145.37ms step:554/3200 train_loss:3.8762 train_time:79078ms step_avg:145.36ms step:555/3200 train_loss:3.8409 train_time:79222ms step_avg:145.36ms step:556/3200 train_loss:3.7717 train_time:79365ms step_avg:145.36ms step:557/3200 train_loss:3.8140 train_time:79508ms step_avg:145.35ms step:558/3200 train_loss:3.4701 train_time:79652ms step_avg:145.35ms step:559/3200 train_loss:3.7389 train_time:79795ms step_avg:145.35ms step:560/3200 train_loss:3.7814 train_time:79939ms step_avg:145.34ms step:561/3200 train_loss:3.8275 train_time:80082ms step_avg:145.34ms step:562/3200 train_loss:3.7381 train_time:80228ms step_avg:145.34ms step:563/3200 train_loss:3.6795 train_time:80372ms step_avg:145.34ms step:564/3200 train_loss:3.8838 train_time:80515ms step_avg:145.33ms step:565/3200 train_loss:3.6942 train_time:80659ms step_avg:145.33ms step:566/3200 train_loss:3.8141 train_time:80804ms step_avg:145.33ms step:567/3200 train_loss:3.7597 train_time:81139ms step_avg:145.67ms step:568/3200 train_loss:3.7273 train_time:81287ms step_avg:145.68ms step:569/3200 train_loss:3.8097 train_time:81430ms step_avg:145.67ms step:570/3200 train_loss:3.7819 train_time:81755ms step_avg:145.99ms step:571/3200 train_loss:3.8119 train_time:81897ms step_avg:145.98ms step:572/3200 train_loss:3.8933 train_time:82039ms step_avg:145.98ms step:573/3200 train_loss:3.8511 train_time:82182ms step_avg:145.97ms step:574/3200 train_loss:3.8548 train_time:82325ms step_avg:145.97ms step:575/3200 train_loss:3.9029 train_time:82468ms step_avg:145.96ms step:576/3200 train_loss:3.8561 train_time:82612ms step_avg:145.96ms step:577/3200 train_loss:3.8844 train_time:82759ms step_avg:145.96ms step:578/3200 train_loss:3.8037 train_time:82904ms step_avg:145.96ms step:579/3200 train_loss:3.8081 train_time:83047ms step_avg:145.95ms step:580/3200 train_loss:3.7882 train_time:83192ms step_avg:145.95ms step:581/3200 train_loss:3.7304 train_time:83333ms step_avg:145.94ms step:582/3200 train_loss:3.7579 train_time:83475ms step_avg:145.94ms step:583/3200 train_loss:3.9821 train_time:83619ms step_avg:145.93ms step:584/3200 train_loss:3.7482 train_time:83765ms step_avg:145.93ms step:585/3200 train_loss:3.7131 train_time:83911ms step_avg:145.93ms step:586/3200 train_loss:3.9090 train_time:84054ms step_avg:145.93ms step:587/3200 train_loss:3.6579 train_time:84197ms step_avg:145.92ms step:588/3200 train_loss:3.7993 train_time:84340ms step_avg:145.92ms step:589/3200 train_loss:3.7807 train_time:84485ms step_avg:145.92ms step:590/3200 train_loss:4.1247 train_time:84629ms step_avg:145.91ms step:591/3200 train_loss:3.9138 train_time:84774ms step_avg:145.91ms step:592/3200 train_loss:3.6471 train_time:84917ms step_avg:145.91ms step:593/3200 train_loss:3.6651 train_time:85062ms step_avg:145.90ms step:594/3200 train_loss:3.6444 train_time:85204ms step_avg:145.90ms step:595/3200 train_loss:3.6947 train_time:85349ms step_avg:145.89ms step:596/3200 train_loss:4.0686 train_time:85493ms step_avg:145.89ms step:597/3200 train_loss:3.7801 train_time:85637ms step_avg:145.89ms step:598/3200 train_loss:3.7073 train_time:85783ms step_avg:145.89ms step:599/3200 train_loss:3.7924 train_time:85928ms step_avg:145.89ms step:600/3200 train_loss:3.6097 train_time:86073ms step_avg:145.89ms step:601/3200 train_loss:3.7243 train_time:86216ms step_avg:145.88ms step:602/3200 train_loss:3.7614 train_time:86360ms step_avg:145.88ms step:603/3200 train_loss:3.7887 train_time:86502ms step_avg:145.87ms step:604/3200 train_loss:3.9101 train_time:86645ms step_avg:145.87ms step:605/3200 train_loss:3.7556 train_time:86791ms step_avg:145.87ms step:606/3200 train_loss:3.7458 train_time:86935ms step_avg:145.86ms step:607/3200 train_loss:3.7017 train_time:87079ms step_avg:145.86ms step:608/3200 train_loss:3.9515 train_time:87225ms step_avg:145.86ms step:609/3200 train_loss:3.7756 train_time:87369ms step_avg:145.86ms step:610/3200 train_loss:3.7506 train_time:87512ms step_avg:145.85ms step:611/3200 train_loss:3.8435 train_time:87655ms step_avg:145.85ms step:612/3200 train_loss:3.7474 train_time:87799ms step_avg:145.85ms step:613/3200 train_loss:3.7294 train_time:87944ms step_avg:145.84ms step:614/3200 train_loss:3.8948 train_time:88089ms step_avg:145.84ms step:615/3200 train_loss:3.8481 train_time:88234ms step_avg:145.84ms step:616/3200 train_loss:3.8270 train_time:88379ms step_avg:145.84ms step:617/3200 train_loss:3.7468 train_time:88522ms step_avg:145.84ms step:618/3200 train_loss:3.7008 train_time:88666ms step_avg:145.83ms step:619/3200 train_loss:3.8101 train_time:88810ms step_avg:145.83ms step:620/3200 train_loss:3.6996 train_time:88953ms step_avg:145.83ms step:621/3200 train_loss:3.7242 train_time:89096ms step_avg:145.82ms step:622/3200 train_loss:4.0396 train_time:89241ms step_avg:145.82ms step:623/3200 train_loss:3.7156 train_time:89386ms step_avg:145.82ms step:624/3200 train_loss:3.7439 train_time:89530ms step_avg:145.81ms step:625/3200 train_loss:3.8277 train_time:89673ms step_avg:145.81ms step:625/3200 val_loss:3.7564 train_time:89725ms step_avg:145.89ms step:626/3200 train_loss:3.8501 train_time:89829ms step_avg:145.83ms step:627/3200 train_loss:3.8733 train_time:89977ms step_avg:145.83ms step:628/3200 train_loss:3.8630 train_time:90119ms step_avg:145.82ms step:629/3200 train_loss:3.8988 train_time:90263ms step_avg:145.82ms step:630/3200 train_loss:3.7210 train_time:90406ms step_avg:145.82ms step:631/3200 train_loss:3.8552 train_time:90549ms step_avg:145.81ms step:632/3200 train_loss:3.8774 train_time:90692ms step_avg:145.81ms step:633/3200 train_loss:3.7819 train_time:90840ms step_avg:145.81ms step:634/3200 train_loss:3.7210 train_time:90988ms step_avg:145.81ms step:635/3200 train_loss:3.8160 train_time:91140ms step_avg:145.82ms step:636/3200 train_loss:4.0747 train_time:91274ms step_avg:145.81ms step:637/3200 train_loss:3.6666 train_time:91417ms step_avg:145.80ms step:638/3200 train_loss:3.4878 train_time:91561ms step_avg:145.80ms step:639/3200 train_loss:3.7126 train_time:91703ms step_avg:145.79ms step:640/3200 train_loss:3.7484 train_time:91847ms step_avg:145.79ms step:641/3200 train_loss:3.7002 train_time:91994ms step_avg:145.79ms step:642/3200 train_loss:3.7116 train_time:92139ms step_avg:145.79ms step:643/3200 train_loss:3.7520 train_time:92284ms step_avg:145.79ms step:644/3200 train_loss:3.7522 train_time:92426ms step_avg:145.78ms step:645/3200 train_loss:3.6866 train_time:92569ms step_avg:145.78ms step:646/3200 train_loss:3.9079 train_time:92712ms step_avg:145.77ms step:647/3200 train_loss:3.8025 train_time:92857ms step_avg:145.77ms step:648/3200 train_loss:3.8014 train_time:93002ms step_avg:145.77ms step:649/3200 train_loss:3.8312 train_time:93146ms step_avg:145.77ms step:650/3200 train_loss:3.8920 train_time:93290ms step_avg:145.77ms step:651/3200 train_loss:3.7523 train_time:93433ms step_avg:145.76ms step:652/3200 train_loss:3.8902 train_time:93575ms step_avg:145.76ms step:653/3200 train_loss:3.7123 train_time:93720ms step_avg:145.75ms step:654/3200 train_loss:3.7899 train_time:93865ms step_avg:145.75ms step:655/3200 train_loss:3.5588 train_time:94008ms step_avg:145.75ms step:656/3200 train_loss:3.7067 train_time:94152ms step_avg:145.75ms step:657/3200 train_loss:3.7153 train_time:94297ms step_avg:145.74ms step:658/3200 train_loss:3.6424 train_time:94441ms step_avg:145.74ms step:659/3200 train_loss:3.8180 train_time:94585ms step_avg:145.74ms step:660/3200 train_loss:3.7257 train_time:94728ms step_avg:145.74ms step:661/3200 train_loss:3.8154 train_time:94870ms step_avg:145.73ms step:662/3200 train_loss:3.8866 train_time:95015ms step_avg:145.73ms step:663/3200 train_loss:3.8013 train_time:95160ms step_avg:145.73ms step:664/3200 train_loss:3.6783 train_time:95303ms step_avg:145.72ms step:665/3200 train_loss:3.7588 train_time:95447ms step_avg:145.72ms step:666/3200 train_loss:3.6298 train_time:95589ms step_avg:145.72ms step:667/3200 train_loss:3.9111 train_time:95734ms step_avg:145.71ms step:668/3200 train_loss:3.7480 train_time:95878ms step_avg:145.71ms step:669/3200 train_loss:3.7636 train_time:96022ms step_avg:145.71ms step:670/3200 train_loss:3.6131 train_time:96165ms step_avg:145.71ms step:671/3200 train_loss:3.7316 train_time:96309ms step_avg:145.70ms step:672/3200 train_loss:3.6843 train_time:96452ms step_avg:145.70ms step:673/3200 train_loss:3.7033 train_time:96596ms step_avg:145.70ms step:674/3200 train_loss:3.9843 train_time:96741ms step_avg:145.69ms step:675/3200 train_loss:3.7715 train_time:96884ms step_avg:145.69ms step:676/3200 train_loss:3.8453 train_time:97029ms step_avg:145.69ms step:677/3200 train_loss:3.6305 train_time:97173ms step_avg:145.69ms step:678/3200 train_loss:3.7236 train_time:97317ms step_avg:145.68ms step:679/3200 train_loss:3.6825 train_time:97463ms step_avg:145.68ms step:680/3200 train_loss:3.8200 train_time:97607ms step_avg:145.68ms step:681/3200 train_loss:3.7177 train_time:97750ms step_avg:145.68ms step:682/3200 train_loss:3.7460 train_time:97895ms step_avg:145.68ms step:683/3200 train_loss:3.8227 train_time:98040ms step_avg:145.68ms step:684/3200 train_loss:3.8649 train_time:98185ms step_avg:145.68ms step:685/3200 train_loss:3.7637 train_time:98329ms step_avg:145.67ms step:686/3200 train_loss:3.8329 train_time:98473ms step_avg:145.67ms step:687/3200 train_loss:3.7661 train_time:98616ms step_avg:145.67ms step:688/3200 train_loss:3.8138 train_time:98762ms step_avg:145.67ms step:689/3200 train_loss:3.4056 train_time:98905ms step_avg:145.66ms step:690/3200 train_loss:3.5502 train_time:99050ms step_avg:145.66ms step:691/3200 train_loss:3.6851 train_time:99193ms step_avg:145.66ms step:692/3200 train_loss:3.5694 train_time:99337ms step_avg:145.66ms step:693/3200 train_loss:3.7787 train_time:99482ms step_avg:145.66ms step:694/3200 train_loss:3.7914 train_time:99626ms step_avg:145.65ms step:695/3200 train_loss:3.6801 train_time:99770ms step_avg:145.65ms step:696/3200 train_loss:3.6732 train_time:99912ms step_avg:145.64ms step:697/3200 train_loss:3.9888 train_time:100060ms step_avg:145.65ms step:698/3200 train_loss:3.7351 train_time:100204ms step_avg:145.65ms step:699/3200 train_loss:3.7768 train_time:100347ms step_avg:145.64ms step:700/3200 train_loss:3.9398 train_time:100492ms step_avg:145.64ms step:701/3200 train_loss:3.7100 train_time:100635ms step_avg:145.64ms step:702/3200 train_loss:3.6757 train_time:100778ms step_avg:145.63ms step:703/3200 train_loss:3.6589 train_time:100922ms step_avg:145.63ms step:704/3200 train_loss:3.6183 train_time:101066ms step_avg:145.63ms step:705/3200 train_loss:3.7023 train_time:101209ms step_avg:145.62ms step:706/3200 train_loss:3.6985 train_time:101354ms step_avg:145.62ms step:707/3200 train_loss:3.7182 train_time:101496ms step_avg:145.62ms step:708/3200 train_loss:3.7756 train_time:101642ms step_avg:145.62ms step:709/3200 train_loss:3.7348 train_time:101786ms step_avg:145.62ms step:710/3200 train_loss:3.7132 train_time:101930ms step_avg:145.61ms step:711/3200 train_loss:3.6757 train_time:102073ms step_avg:145.61ms step:712/3200 train_loss:3.7219 train_time:102217ms step_avg:145.61ms step:713/3200 train_loss:3.7767 train_time:102364ms step_avg:145.61ms step:714/3200 train_loss:3.7944 train_time:102507ms step_avg:145.61ms step:715/3200 train_loss:3.7032 train_time:102651ms step_avg:145.60ms step:716/3200 train_loss:3.7076 train_time:102796ms step_avg:145.60ms step:717/3200 train_loss:3.7182 train_time:102941ms step_avg:145.60ms step:718/3200 train_loss:3.8715 train_time:103087ms step_avg:145.60ms step:719/3200 train_loss:3.7276 train_time:103230ms step_avg:145.60ms step:720/3200 train_loss:3.8089 train_time:103373ms step_avg:145.60ms step:721/3200 train_loss:3.9668 train_time:103518ms step_avg:145.60ms step:722/3200 train_loss:3.5988 train_time:103664ms step_avg:145.60ms step:723/3200 train_loss:3.8605 train_time:103807ms step_avg:145.59ms step:724/3200 train_loss:3.9141 train_time:103951ms step_avg:145.59ms step:725/3200 train_loss:3.6972 train_time:104096ms step_avg:145.59ms step:726/3200 train_loss:3.7765 train_time:104240ms step_avg:145.59ms step:727/3200 train_loss:3.6740 train_time:104383ms step_avg:145.58ms step:728/3200 train_loss:3.6942 train_time:104527ms step_avg:145.58ms step:729/3200 train_loss:3.8740 train_time:104671ms step_avg:145.58ms step:730/3200 train_loss:3.8130 train_time:104815ms step_avg:145.58ms step:731/3200 train_loss:3.8071 train_time:104958ms step_avg:145.57ms step:732/3200 train_loss:3.6987 train_time:105103ms step_avg:145.57ms step:733/3200 train_loss:3.7287 train_time:105247ms step_avg:145.57ms step:734/3200 train_loss:3.9609 train_time:105389ms step_avg:145.57ms step:735/3200 train_loss:3.6972 train_time:105534ms step_avg:145.56ms step:736/3200 train_loss:3.7548 train_time:105677ms step_avg:145.56ms step:737/3200 train_loss:3.8757 train_time:105822ms step_avg:145.56ms step:738/3200 train_loss:3.7951 train_time:105965ms step_avg:145.56ms step:739/3200 train_loss:3.7356 train_time:106108ms step_avg:145.55ms step:740/3200 train_loss:3.6329 train_time:106252ms step_avg:145.55ms step:741/3200 train_loss:4.2582 train_time:106396ms step_avg:145.55ms step:742/3200 train_loss:3.6335 train_time:106541ms step_avg:145.55ms step:743/3200 train_loss:3.7079 train_time:106684ms step_avg:145.54ms step:744/3200 train_loss:3.7134 train_time:106828ms step_avg:145.54ms step:745/3200 train_loss:3.7801 train_time:106971ms step_avg:145.54ms step:746/3200 train_loss:3.7400 train_time:107115ms step_avg:145.54ms step:747/3200 train_loss:3.7294 train_time:107259ms step_avg:145.53ms step:748/3200 train_loss:3.7661 train_time:107403ms step_avg:145.53ms step:749/3200 train_loss:3.6962 train_time:107546ms step_avg:145.53ms step:750/3200 train_loss:3.6964 train_time:107690ms step_avg:145.53ms step:750/3200 val_loss:3.7052 train_time:107741ms step_avg:145.60ms step:751/3200 train_loss:3.7373 train_time:107843ms step_avg:145.54ms step:752/3200 train_loss:3.6973 train_time:107990ms step_avg:145.54ms step:753/3200 train_loss:3.7388 train_time:108134ms step_avg:145.54ms step:754/3200 train_loss:3.7498 train_time:108276ms step_avg:145.53ms step:755/3200 train_loss:3.7200 train_time:108419ms step_avg:145.53ms step:756/3200 train_loss:3.7959 train_time:108723ms step_avg:145.74ms step:757/3200 train_loss:3.6195 train_time:108869ms step_avg:145.74ms step:758/3200 train_loss:3.8640 train_time:109014ms step_avg:145.74ms step:759/3200 train_loss:3.7796 train_time:109157ms step_avg:145.74ms step:760/3200 train_loss:3.7147 train_time:109480ms step_avg:145.97ms step:761/3200 train_loss:3.8237 train_time:109620ms step_avg:145.97ms step:762/3200 train_loss:3.5381 train_time:109762ms step_avg:145.96ms step:763/3200 train_loss:3.6879 train_time:109904ms step_avg:145.95ms step:764/3200 train_loss:3.7997 train_time:110046ms step_avg:145.95ms step:765/3200 train_loss:3.4559 train_time:110189ms step_avg:145.95ms step:766/3200 train_loss:3.8754 train_time:110334ms step_avg:145.94ms step:767/3200 train_loss:3.7237 train_time:110483ms step_avg:145.95ms step:768/3200 train_loss:3.6978 train_time:110629ms step_avg:145.95ms step:769/3200 train_loss:3.7145 train_time:110772ms step_avg:145.94ms step:770/3200 train_loss:3.7302 train_time:110915ms step_avg:145.94ms step:771/3200 train_loss:3.7884 train_time:111058ms step_avg:145.94ms step:772/3200 train_loss:4.0147 train_time:111201ms step_avg:145.93ms step:773/3200 train_loss:3.5982 train_time:111346ms step_avg:145.93ms step:774/3200 train_loss:3.7841 train_time:111493ms step_avg:145.93ms step:775/3200 train_loss:3.7748 train_time:111638ms step_avg:145.93ms step:776/3200 train_loss:3.7422 train_time:111781ms step_avg:145.93ms step:777/3200 train_loss:3.5454 train_time:111924ms step_avg:145.92ms step:778/3200 train_loss:3.5439 train_time:112068ms step_avg:145.92ms step:779/3200 train_loss:3.6179 train_time:112211ms step_avg:145.92ms step:780/3200 train_loss:3.7053 train_time:112355ms step_avg:145.92ms step:781/3200 train_loss:3.7322 train_time:112499ms step_avg:145.91ms step:782/3200 train_loss:3.7997 train_time:112643ms step_avg:145.91ms step:783/3200 train_loss:3.7125 train_time:112786ms step_avg:145.91ms step:784/3200 train_loss:3.7085 train_time:112931ms step_avg:145.91ms step:785/3200 train_loss:3.7102 train_time:113074ms step_avg:145.90ms step:786/3200 train_loss:3.6873 train_time:113217ms step_avg:145.90ms step:787/3200 train_loss:3.5882 train_time:113360ms step_avg:145.89ms step:788/3200 train_loss:3.8401 train_time:113504ms step_avg:145.89ms step:789/3200 train_loss:3.6349 train_time:113650ms step_avg:145.89ms step:790/3200 train_loss:3.6902 train_time:113795ms step_avg:145.89ms step:791/3200 train_loss:3.7609 train_time:113938ms step_avg:145.89ms step:792/3200 train_loss:3.8980 train_time:114082ms step_avg:145.88ms step:793/3200 train_loss:3.8992 train_time:114226ms step_avg:145.88ms step:794/3200 train_loss:3.6074 train_time:114371ms step_avg:145.88ms step:795/3200 train_loss:3.7316 train_time:114514ms step_avg:145.88ms step:796/3200 train_loss:3.7906 train_time:114659ms step_avg:145.88ms step:797/3200 train_loss:3.9083 train_time:114803ms step_avg:145.87ms step:798/3200 train_loss:3.6522 train_time:114946ms step_avg:145.87ms step:799/3200 train_loss:3.7971 train_time:115090ms step_avg:145.87ms step:800/3200 train_loss:3.6874 train_time:115234ms step_avg:145.87ms step:801/3200 train_loss:3.6688 train_time:115376ms step_avg:145.86ms step:802/3200 train_loss:3.7668 train_time:115519ms step_avg:145.86ms step:803/3200 train_loss:3.6309 train_time:115663ms step_avg:145.86ms step:804/3200 train_loss:3.6533 train_time:115808ms step_avg:145.85ms step:805/3200 train_loss:3.7728 train_time:115952ms step_avg:145.85ms step:806/3200 train_loss:3.6601 train_time:116095ms step_avg:145.85ms step:807/3200 train_loss:3.6846 train_time:116239ms step_avg:145.85ms step:808/3200 train_loss:3.7767 train_time:116383ms step_avg:145.84ms step:809/3200 train_loss:3.6995 train_time:116527ms step_avg:145.84ms step:810/3200 train_loss:3.6208 train_time:116671ms step_avg:145.84ms step:811/3200 train_loss:3.7005 train_time:116814ms step_avg:145.84ms step:812/3200 train_loss:3.7334 train_time:116958ms step_avg:145.83ms step:813/3200 train_loss:3.7310 train_time:117102ms step_avg:145.83ms step:814/3200 train_loss:3.7668 train_time:117245ms step_avg:145.83ms step:815/3200 train_loss:3.7111 train_time:117391ms step_avg:145.83ms step:816/3200 train_loss:3.6950 train_time:117536ms step_avg:145.83ms step:817/3200 train_loss:3.8033 train_time:117678ms step_avg:145.82ms step:818/3200 train_loss:3.8950 train_time:117822ms step_avg:145.82ms step:819/3200 train_loss:3.6636 train_time:117967ms step_avg:145.82ms step:820/3200 train_loss:3.8588 train_time:118111ms step_avg:145.82ms step:821/3200 train_loss:3.6400 train_time:118255ms step_avg:145.81ms step:822/3200 train_loss:3.6825 train_time:118399ms step_avg:145.81ms step:823/3200 train_loss:3.8121 train_time:118542ms step_avg:145.81ms step:824/3200 train_loss:3.7196 train_time:118686ms step_avg:145.81ms step:825/3200 train_loss:3.6562 train_time:118831ms step_avg:145.80ms step:826/3200 train_loss:3.7481 train_time:118975ms step_avg:145.80ms step:827/3200 train_loss:3.6405 train_time:119118ms step_avg:145.80ms step:828/3200 train_loss:3.8701 train_time:119263ms step_avg:145.80ms step:829/3200 train_loss:3.7570 train_time:119406ms step_avg:145.80ms step:830/3200 train_loss:3.8018 train_time:119551ms step_avg:145.79ms step:831/3200 train_loss:3.6735 train_time:119695ms step_avg:145.79ms step:832/3200 train_loss:3.7225 train_time:119838ms step_avg:145.79ms step:833/3200 train_loss:3.6509 train_time:119982ms step_avg:145.79ms step:834/3200 train_loss:3.7841 train_time:120127ms step_avg:145.79ms step:835/3200 train_loss:3.6172 train_time:120271ms step_avg:145.78ms step:836/3200 train_loss:3.5965 train_time:120414ms step_avg:145.78ms step:837/3200 train_loss:3.8521 train_time:120557ms step_avg:145.78ms step:838/3200 train_loss:3.5459 train_time:120701ms step_avg:145.77ms step:839/3200 train_loss:3.7273 train_time:120844ms step_avg:145.77ms step:840/3200 train_loss:3.5642 train_time:120989ms step_avg:145.77ms step:841/3200 train_loss:3.6096 train_time:121135ms step_avg:145.77ms step:842/3200 train_loss:3.6945 train_time:121278ms step_avg:145.77ms step:843/3200 train_loss:3.7203 train_time:121423ms step_avg:145.77ms step:844/3200 train_loss:3.7106 train_time:121566ms step_avg:145.76ms step:845/3200 train_loss:3.5689 train_time:121712ms step_avg:145.76ms step:846/3200 train_loss:3.8012 train_time:121856ms step_avg:145.76ms step:847/3200 train_loss:3.6708 train_time:122000ms step_avg:145.76ms step:848/3200 train_loss:3.6286 train_time:122144ms step_avg:145.76ms step:849/3200 train_loss:3.7642 train_time:122287ms step_avg:145.75ms step:850/3200 train_loss:3.6333 train_time:122433ms step_avg:145.75ms step:851/3200 train_loss:3.5878 train_time:122576ms step_avg:145.75ms step:852/3200 train_loss:3.8763 train_time:122719ms step_avg:145.75ms step:853/3200 train_loss:3.5875 train_time:122864ms step_avg:145.75ms step:854/3200 train_loss:3.7013 train_time:123009ms step_avg:145.74ms step:855/3200 train_loss:3.7870 train_time:123153ms step_avg:145.74ms step:856/3200 train_loss:3.6556 train_time:123298ms step_avg:145.74ms step:857/3200 train_loss:3.6907 train_time:123442ms step_avg:145.74ms step:858/3200 train_loss:3.7383 train_time:123586ms step_avg:145.74ms step:859/3200 train_loss:3.6207 train_time:123731ms step_avg:145.74ms step:860/3200 train_loss:3.7003 train_time:123875ms step_avg:145.73ms step:861/3200 train_loss:3.7292 train_time:124018ms step_avg:145.73ms step:862/3200 train_loss:3.7677 train_time:124162ms step_avg:145.73ms step:863/3200 train_loss:3.7320 train_time:124306ms step_avg:145.73ms step:864/3200 train_loss:3.7109 train_time:124451ms step_avg:145.73ms step:865/3200 train_loss:3.5342 train_time:124596ms step_avg:145.73ms step:866/3200 train_loss:3.7304 train_time:124739ms step_avg:145.72ms step:867/3200 train_loss:4.0000 train_time:124883ms step_avg:145.72ms step:868/3200 train_loss:3.5862 train_time:125028ms step_avg:145.72ms step:869/3200 train_loss:3.7715 train_time:125173ms step_avg:145.72ms step:870/3200 train_loss:3.7468 train_time:125317ms step_avg:145.72ms step:871/3200 train_loss:3.5886 train_time:125460ms step_avg:145.71ms step:872/3200 train_loss:3.5624 train_time:125604ms step_avg:145.71ms step:873/3200 train_loss:3.7975 train_time:125749ms step_avg:145.71ms step:874/3200 train_loss:3.5906 train_time:125892ms step_avg:145.71ms step:875/3200 train_loss:3.3199 train_time:126035ms step_avg:145.71ms step:875/3200 val_loss:3.6625 train_time:126086ms step_avg:145.76ms step:876/3200 train_loss:3.7815 train_time:126188ms step_avg:145.71ms step:877/3200 train_loss:3.5871 train_time:126337ms step_avg:145.72ms step:878/3200 train_loss:3.7664 train_time:126480ms step_avg:145.71ms step:879/3200 train_loss:3.6130 train_time:126622ms step_avg:145.71ms step:880/3200 train_loss:3.7964 train_time:126764ms step_avg:145.71ms step:881/3200 train_loss:3.4593 train_time:126907ms step_avg:145.70ms step:882/3200 train_loss:3.6357 train_time:127051ms step_avg:145.70ms step:883/3200 train_loss:3.8243 train_time:127197ms step_avg:145.70ms step:884/3200 train_loss:3.9835 train_time:127341ms step_avg:145.70ms step:885/3200 train_loss:3.7079 train_time:127487ms step_avg:145.70ms step:886/3200 train_loss:3.6285 train_time:127631ms step_avg:145.70ms step:887/3200 train_loss:3.7145 train_time:127774ms step_avg:145.69ms step:888/3200 train_loss:4.2265 train_time:127917ms step_avg:145.69ms step:889/3200 train_loss:3.9831 train_time:128059ms step_avg:145.69ms step:890/3200 train_loss:3.6543 train_time:128203ms step_avg:145.68ms step:891/3200 train_loss:3.6678 train_time:128348ms step_avg:145.68ms step:892/3200 train_loss:3.4990 train_time:128494ms step_avg:145.69ms step:893/3200 train_loss:3.8475 train_time:128637ms step_avg:145.68ms step:894/3200 train_loss:3.5731 train_time:128780ms step_avg:145.68ms step:895/3200 train_loss:3.8149 train_time:128923ms step_avg:145.68ms step:896/3200 train_loss:3.8342 train_time:129068ms step_avg:145.67ms step:897/3200 train_loss:3.6336 train_time:129212ms step_avg:145.67ms step:898/3200 train_loss:3.6798 train_time:129356ms step_avg:145.67ms step:899/3200 train_loss:3.7283 train_time:129500ms step_avg:145.67ms step:900/3200 train_loss:3.6201 train_time:129645ms step_avg:145.67ms step:901/3200 train_loss:3.5556 train_time:129790ms step_avg:145.67ms step:902/3200 train_loss:3.7662 train_time:129933ms step_avg:145.67ms step:903/3200 train_loss:3.7668 train_time:130077ms step_avg:145.66ms step:904/3200 train_loss:3.6737 train_time:130219ms step_avg:145.66ms step:905/3200 train_loss:3.6445 train_time:130364ms step_avg:145.66ms step:906/3200 train_loss:3.6321 train_time:130509ms step_avg:145.66ms step:907/3200 train_loss:3.8566 train_time:130655ms step_avg:145.66ms step:908/3200 train_loss:3.6532 train_time:130798ms step_avg:145.66ms step:909/3200 train_loss:3.6914 train_time:130942ms step_avg:145.65ms step:910/3200 train_loss:3.5978 train_time:131087ms step_avg:145.65ms step:911/3200 train_loss:3.6848 train_time:131232ms step_avg:145.65ms step:912/3200 train_loss:3.7651 train_time:131375ms step_avg:145.65ms step:913/3200 train_loss:3.7419 train_time:131519ms step_avg:145.65ms step:914/3200 train_loss:3.6217 train_time:131663ms step_avg:145.64ms step:915/3200 train_loss:3.8738 train_time:131806ms step_avg:145.64ms step:916/3200 train_loss:3.6716 train_time:131951ms step_avg:145.64ms step:917/3200 train_loss:3.7665 train_time:132094ms step_avg:145.64ms step:918/3200 train_loss:3.7379 train_time:132237ms step_avg:145.64ms step:919/3200 train_loss:4.9581 train_time:132381ms step_avg:145.63ms step:920/3200 train_loss:3.6519 train_time:132525ms step_avg:145.63ms step:921/3200 train_loss:3.7114 train_time:132671ms step_avg:145.63ms step:922/3200 train_loss:3.6760 train_time:132812ms step_avg:145.63ms step:923/3200 train_loss:3.7215 train_time:132959ms step_avg:145.63ms step:924/3200 train_loss:3.7417 train_time:133102ms step_avg:145.63ms step:925/3200 train_loss:3.8280 train_time:133246ms step_avg:145.62ms step:926/3200 train_loss:3.7963 train_time:133390ms step_avg:145.62ms step:927/3200 train_loss:3.6960 train_time:133534ms step_avg:145.62ms step:928/3200 train_loss:3.6844 train_time:133678ms step_avg:145.62ms step:929/3200 train_loss:3.9152 train_time:133821ms step_avg:145.62ms step:930/3200 train_loss:3.7495 train_time:133965ms step_avg:145.61ms step:931/3200 train_loss:3.5435 train_time:134110ms step_avg:145.61ms step:932/3200 train_loss:3.6276 train_time:134254ms step_avg:145.61ms step:933/3200 train_loss:3.8148 train_time:134398ms step_avg:145.61ms step:934/3200 train_loss:3.5254 train_time:134541ms step_avg:145.61ms step:935/3200 train_loss:3.7112 train_time:134686ms step_avg:145.61ms step:936/3200 train_loss:3.5926 train_time:134831ms step_avg:145.61ms step:937/3200 train_loss:3.6536 train_time:134974ms step_avg:145.60ms step:938/3200 train_loss:3.7450 train_time:135118ms step_avg:145.60ms step:939/3200 train_loss:3.6797 train_time:135263ms step_avg:145.60ms step:940/3200 train_loss:3.8379 train_time:135407ms step_avg:145.60ms step:941/3200 train_loss:3.6272 train_time:135551ms step_avg:145.60ms step:942/3200 train_loss:3.6881 train_time:135696ms step_avg:145.60ms step:943/3200 train_loss:3.4898 train_time:135839ms step_avg:145.59ms step:944/3200 train_loss:3.8398 train_time:135982ms step_avg:145.59ms step:945/3200 train_loss:3.5491 train_time:136289ms step_avg:145.76ms step:946/3200 train_loss:3.5699 train_time:136435ms step_avg:145.76ms step:947/3200 train_loss:5.1992 train_time:136581ms step_avg:145.76ms step:948/3200 train_loss:3.7426 train_time:136724ms step_avg:145.76ms step:949/3200 train_loss:3.6352 train_time:136867ms step_avg:145.76ms step:950/3200 train_loss:3.5318 train_time:137183ms step_avg:145.94ms step:951/3200 train_loss:3.5902 train_time:137326ms step_avg:145.94ms step:952/3200 train_loss:3.5438 train_time:137469ms step_avg:145.93ms step:953/3200 train_loss:3.6181 train_time:137611ms step_avg:145.93ms step:954/3200 train_loss:3.6931 train_time:137753ms step_avg:145.93ms step:955/3200 train_loss:3.5796 train_time:137896ms step_avg:145.92ms step:956/3200 train_loss:3.6234 train_time:138039ms step_avg:145.92ms step:957/3200 train_loss:3.5863 train_time:138188ms step_avg:145.92ms step:958/3200 train_loss:3.6409 train_time:138334ms step_avg:145.92ms step:959/3200 train_loss:3.6316 train_time:138476ms step_avg:145.92ms step:960/3200 train_loss:3.6475 train_time:138619ms step_avg:145.91ms step:961/3200 train_loss:3.5339 train_time:138762ms step_avg:145.91ms step:962/3200 train_loss:3.7969 train_time:138904ms step_avg:145.91ms step:963/3200 train_loss:3.7414 train_time:139048ms step_avg:145.91ms step:964/3200 train_loss:3.6870 train_time:139201ms step_avg:145.91ms step:965/3200 train_loss:3.5888 train_time:139340ms step_avg:145.91ms step:966/3200 train_loss:3.6193 train_time:139485ms step_avg:145.90ms step:967/3200 train_loss:3.8490 train_time:139630ms step_avg:145.90ms step:968/3200 train_loss:3.6709 train_time:139774ms step_avg:145.90ms step:969/3200 train_loss:3.6555 train_time:139917ms step_avg:145.90ms step:970/3200 train_loss:3.7208 train_time:140060ms step_avg:145.90ms step:971/3200 train_loss:3.5302 train_time:140204ms step_avg:145.89ms step:972/3200 train_loss:3.6848 train_time:140348ms step_avg:145.89ms step:973/3200 train_loss:3.6330 train_time:140495ms step_avg:145.89ms step:974/3200 train_loss:3.6819 train_time:140638ms step_avg:145.89ms step:975/3200 train_loss:3.7519 train_time:140781ms step_avg:145.89ms step:976/3200 train_loss:3.6271 train_time:140924ms step_avg:145.88ms step:977/3200 train_loss:3.8202 train_time:141069ms step_avg:145.88ms step:978/3200 train_loss:3.7087 train_time:141213ms step_avg:145.88ms step:979/3200 train_loss:3.5315 train_time:141357ms step_avg:145.88ms step:980/3200 train_loss:3.8251 train_time:141501ms step_avg:145.88ms step:981/3200 train_loss:3.5606 train_time:141645ms step_avg:145.88ms step:982/3200 train_loss:3.7280 train_time:141789ms step_avg:145.87ms step:983/3200 train_loss:3.7036 train_time:141933ms step_avg:145.87ms step:984/3200 train_loss:3.7067 train_time:142076ms step_avg:145.87ms step:985/3200 train_loss:3.6512 train_time:142219ms step_avg:145.87ms step:986/3200 train_loss:3.7329 train_time:142363ms step_avg:145.86ms step:987/3200 train_loss:3.5579 train_time:142509ms step_avg:145.86ms step:988/3200 train_loss:3.6293 train_time:142652ms step_avg:145.86ms step:989/3200 train_loss:3.6276 train_time:142796ms step_avg:145.86ms step:990/3200 train_loss:3.5749 train_time:142938ms step_avg:145.86ms step:991/3200 train_loss:3.7940 train_time:143083ms step_avg:145.85ms step:992/3200 train_loss:3.6133 train_time:143228ms step_avg:145.85ms step:993/3200 train_loss:3.5818 train_time:143372ms step_avg:145.85ms step:994/3200 train_loss:3.6545 train_time:143517ms step_avg:145.85ms step:995/3200 train_loss:3.7435 train_time:143661ms step_avg:145.85ms step:996/3200 train_loss:3.6872 train_time:143804ms step_avg:145.85ms step:997/3200 train_loss:3.5976 train_time:143949ms step_avg:145.84ms step:998/3200 train_loss:3.9378 train_time:144096ms step_avg:145.85ms step:999/3200 train_loss:3.6115 train_time:144238ms step_avg:145.84ms step:1000/3200 train_loss:3.7354 train_time:144383ms step_avg:145.84ms step:1000/3200 val_loss:3.6264 train_time:144436ms step_avg:145.89ms step:1001/3200 train_loss:3.6001 train_time:144541ms step_avg:145.85ms step:1002/3200 train_loss:3.6474 train_time:144686ms step_avg:145.85ms step:1003/3200 train_loss:3.5356 train_time:144829ms step_avg:145.85ms step:1004/3200 train_loss:3.7222 train_time:144971ms step_avg:145.85ms step:1005/3200 train_loss:3.7631 train_time:145115ms step_avg:145.84ms step:1006/3200 train_loss:3.5440 train_time:145258ms step_avg:145.84ms step:1007/3200 train_loss:3.6242 train_time:145400ms step_avg:145.84ms step:1008/3200 train_loss:3.5921 train_time:145546ms step_avg:145.84ms step:1009/3200 train_loss:3.7158 train_time:145693ms step_avg:145.84ms step:1010/3200 train_loss:3.8176 train_time:145838ms step_avg:145.84ms step:1011/3200 train_loss:3.7149 train_time:145982ms step_avg:145.84ms step:1012/3200 train_loss:3.6737 train_time:146124ms step_avg:145.83ms step:1013/3200 train_loss:3.5398 train_time:146266ms step_avg:145.83ms step:1014/3200 train_loss:3.6790 train_time:146410ms step_avg:145.83ms step:1015/3200 train_loss:3.7865 train_time:146555ms step_avg:145.83ms step:1016/3200 train_loss:3.4962 train_time:146701ms step_avg:145.83ms step:1017/3200 train_loss:3.5870 train_time:146847ms step_avg:145.83ms step:1018/3200 train_loss:3.5837 train_time:146992ms step_avg:145.83ms step:1019/3200 train_loss:3.5374 train_time:147136ms step_avg:145.82ms step:1020/3200 train_loss:3.6745 train_time:147279ms step_avg:145.82ms step:1021/3200 train_loss:3.5838 train_time:147422ms step_avg:145.82ms step:1022/3200 train_loss:3.5237 train_time:147565ms step_avg:145.82ms step:1023/3200 train_loss:3.6290 train_time:147712ms step_avg:145.82ms step:1024/3200 train_loss:3.6558 train_time:147855ms step_avg:145.81ms step:1025/3200 train_loss:3.6368 train_time:148001ms step_avg:145.81ms step:1026/3200 train_loss:3.6410 train_time:148144ms step_avg:145.81ms step:1027/3200 train_loss:3.8030 train_time:148287ms step_avg:145.81ms step:1028/3200 train_loss:3.4874 train_time:148431ms step_avg:145.81ms step:1029/3200 train_loss:3.5540 train_time:148576ms step_avg:145.81ms step:1030/3200 train_loss:3.5033 train_time:148720ms step_avg:145.80ms step:1031/3200 train_loss:3.6745 train_time:148864ms step_avg:145.80ms step:1032/3200 train_loss:3.6568 train_time:149007ms step_avg:145.80ms step:1033/3200 train_loss:3.8408 train_time:149151ms step_avg:145.80ms step:1034/3200 train_loss:3.6524 train_time:149295ms step_avg:145.80ms step:1035/3200 train_loss:3.5766 train_time:149438ms step_avg:145.79ms step:1036/3200 train_loss:3.5938 train_time:149582ms step_avg:145.79ms step:1037/3200 train_loss:3.6578 train_time:149725ms step_avg:145.79ms step:1038/3200 train_loss:3.9624 train_time:149869ms step_avg:145.79ms step:1039/3200 train_loss:3.7830 train_time:150012ms step_avg:145.78ms step:1040/3200 train_loss:3.6798 train_time:150159ms step_avg:145.78ms step:1041/3200 train_loss:3.5633 train_time:150302ms step_avg:145.78ms step:1042/3200 train_loss:3.6437 train_time:150445ms step_avg:145.78ms step:1043/3200 train_loss:3.6821 train_time:150588ms step_avg:145.78ms step:1044/3200 train_loss:3.6009 train_time:150732ms step_avg:145.78ms step:1045/3200 train_loss:3.6189 train_time:150876ms step_avg:145.77ms step:1046/3200 train_loss:3.6983 train_time:151022ms step_avg:145.77ms step:1047/3200 train_loss:3.6007 train_time:151165ms step_avg:145.77ms step:1048/3200 train_loss:3.8014 train_time:151309ms step_avg:145.77ms step:1049/3200 train_loss:3.6621 train_time:151453ms step_avg:145.77ms step:1050/3200 train_loss:3.5795 train_time:151598ms step_avg:145.77ms step:1051/3200 train_loss:3.5480 train_time:151742ms step_avg:145.77ms step:1052/3200 train_loss:3.6730 train_time:151886ms step_avg:145.76ms step:1053/3200 train_loss:3.5492 train_time:152029ms step_avg:145.76ms step:1054/3200 train_loss:3.8708 train_time:152173ms step_avg:145.76ms step:1055/3200 train_loss:3.7018 train_time:152318ms step_avg:145.76ms step:1056/3200 train_loss:3.5633 train_time:152463ms step_avg:145.76ms step:1057/3200 train_loss:3.6652 train_time:152606ms step_avg:145.76ms step:1058/3200 train_loss:3.7407 train_time:152750ms step_avg:145.75ms step:1059/3200 train_loss:3.4632 train_time:152893ms step_avg:145.75ms step:1060/3200 train_loss:3.5840 train_time:153038ms step_avg:145.75ms step:1061/3200 train_loss:3.6122 train_time:153181ms step_avg:145.75ms step:1062/3200 train_loss:3.5767 train_time:153325ms step_avg:145.75ms step:1063/3200 train_loss:3.5544 train_time:153468ms step_avg:145.74ms step:1064/3200 train_loss:3.6476 train_time:153614ms step_avg:145.74ms step:1065/3200 train_loss:3.5502 train_time:153760ms step_avg:145.74ms step:1066/3200 train_loss:3.5421 train_time:153904ms step_avg:145.74ms step:1067/3200 train_loss:3.5655 train_time:154048ms step_avg:145.74ms step:1068/3200 train_loss:3.4719 train_time:154192ms step_avg:145.74ms step:1069/3200 train_loss:3.5905 train_time:154336ms step_avg:145.74ms step:1070/3200 train_loss:3.4636 train_time:154480ms step_avg:145.74ms step:1071/3200 train_loss:3.7229 train_time:154624ms step_avg:145.73ms step:1072/3200 train_loss:3.6679 train_time:154767ms step_avg:145.73ms step:1073/3200 train_loss:3.6159 train_time:154912ms step_avg:145.73ms step:1074/3200 train_loss:3.6851 train_time:155057ms step_avg:145.73ms step:1075/3200 train_loss:3.6295 train_time:155203ms step_avg:145.73ms step:1076/3200 train_loss:3.5666 train_time:155347ms step_avg:145.73ms step:1077/3200 train_loss:3.9673 train_time:155491ms step_avg:145.73ms step:1078/3200 train_loss:3.6366 train_time:155635ms step_avg:145.73ms step:1079/3200 train_loss:3.3259 train_time:155779ms step_avg:145.72ms step:1080/3200 train_loss:3.7011 train_time:155923ms step_avg:145.72ms step:1081/3200 train_loss:3.6189 train_time:156067ms step_avg:145.72ms step:1082/3200 train_loss:3.6781 train_time:156211ms step_avg:145.72ms step:1083/3200 train_loss:3.7736 train_time:156357ms step_avg:145.72ms step:1084/3200 train_loss:3.6731 train_time:156502ms step_avg:145.72ms step:1085/3200 train_loss:3.6478 train_time:156645ms step_avg:145.72ms step:1086/3200 train_loss:3.6100 train_time:156789ms step_avg:145.71ms step:1087/3200 train_loss:3.8055 train_time:156934ms step_avg:145.71ms step:1088/3200 train_loss:3.6991 train_time:157079ms step_avg:145.71ms step:1089/3200 train_loss:3.5287 train_time:157223ms step_avg:145.71ms step:1090/3200 train_loss:3.5543 train_time:157367ms step_avg:145.71ms step:1091/3200 train_loss:3.6666 train_time:157510ms step_avg:145.71ms step:1092/3200 train_loss:3.4684 train_time:157657ms step_avg:145.71ms step:1093/3200 train_loss:3.6657 train_time:157801ms step_avg:145.71ms step:1094/3200 train_loss:3.7958 train_time:157945ms step_avg:145.71ms step:1095/3200 train_loss:3.6403 train_time:158088ms step_avg:145.70ms step:1096/3200 train_loss:3.5867 train_time:158232ms step_avg:145.70ms step:1097/3200 train_loss:3.6079 train_time:158377ms step_avg:145.70ms step:1098/3200 train_loss:3.6544 train_time:158521ms step_avg:145.70ms step:1099/3200 train_loss:3.7331 train_time:158665ms step_avg:145.70ms step:1100/3200 train_loss:3.6876 train_time:158808ms step_avg:145.69ms step:1101/3200 train_loss:3.6180 train_time:158952ms step_avg:145.69ms step:1102/3200 train_loss:3.4797 train_time:159096ms step_avg:145.69ms step:1103/3200 train_loss:3.5452 train_time:159242ms step_avg:145.69ms step:1104/3200 train_loss:3.6241 train_time:159385ms step_avg:145.69ms step:1105/3200 train_loss:3.4996 train_time:159529ms step_avg:145.69ms step:1106/3200 train_loss:4.2563 train_time:159674ms step_avg:145.69ms step:1107/3200 train_loss:3.4054 train_time:159820ms step_avg:145.69ms step:1108/3200 train_loss:3.7499 train_time:159964ms step_avg:145.69ms step:1109/3200 train_loss:3.5317 train_time:160107ms step_avg:145.68ms step:1110/3200 train_loss:3.6729 train_time:160252ms step_avg:145.68ms step:1111/3200 train_loss:3.6101 train_time:160396ms step_avg:145.68ms step:1112/3200 train_loss:3.6524 train_time:160540ms step_avg:145.68ms step:1113/3200 train_loss:3.7478 train_time:160684ms step_avg:145.68ms step:1114/3200 train_loss:3.6052 train_time:160828ms step_avg:145.68ms step:1115/3200 train_loss:3.5443 train_time:160972ms step_avg:145.68ms step:1116/3200 train_loss:3.4484 train_time:161118ms step_avg:145.68ms step:1117/3200 train_loss:3.6177 train_time:161262ms step_avg:145.67ms step:1118/3200 train_loss:3.7632 train_time:161404ms step_avg:145.67ms step:1119/3200 train_loss:3.8086 train_time:161548ms step_avg:145.67ms step:1120/3200 train_loss:3.6441 train_time:161692ms step_avg:145.67ms step:1121/3200 train_loss:3.6737 train_time:161836ms step_avg:145.67ms step:1122/3200 train_loss:3.5703 train_time:161981ms step_avg:145.67ms step:1123/3200 train_loss:3.6296 train_time:162125ms step_avg:145.67ms step:1124/3200 train_loss:3.7682 train_time:162268ms step_avg:145.66ms step:1125/3200 train_loss:3.5388 train_time:162412ms step_avg:145.66ms step:1125/3200 val_loss:3.5991 train_time:162463ms step_avg:145.71ms step:1126/3200 train_loss:3.4453 train_time:162568ms step_avg:145.67ms step:1127/3200 train_loss:3.6599 train_time:162714ms step_avg:145.67ms step:1128/3200 train_loss:3.8698 train_time:162857ms step_avg:145.67ms step:1129/3200 train_loss:3.4161 train_time:162999ms step_avg:145.67ms step:1130/3200 train_loss:3.7378 train_time:163143ms step_avg:145.66ms step:1131/3200 train_loss:3.5690 train_time:163286ms step_avg:145.66ms step:1132/3200 train_loss:3.6031 train_time:163429ms step_avg:145.66ms step:1133/3200 train_loss:3.5499 train_time:163575ms step_avg:145.66ms step:1134/3200 train_loss:3.7106 train_time:163881ms step_avg:145.80ms step:1135/3200 train_loss:3.6459 train_time:164027ms step_avg:145.80ms step:1136/3200 train_loss:3.6998 train_time:164169ms step_avg:145.80ms step:1137/3200 train_loss:3.7352 train_time:164312ms step_avg:145.80ms step:1138/3200 train_loss:3.6435 train_time:164454ms step_avg:145.79ms step:1139/3200 train_loss:3.5362 train_time:164597ms step_avg:145.79ms step:1140/3200 train_loss:3.8430 train_time:164915ms step_avg:145.94ms step:1141/3200 train_loss:3.6503 train_time:165056ms step_avg:145.94ms step:1142/3200 train_loss:3.7558 train_time:165199ms step_avg:145.94ms step:1143/3200 train_loss:3.6347 train_time:165342ms step_avg:145.93ms step:1144/3200 train_loss:3.5547 train_time:165484ms step_avg:145.93ms step:1145/3200 train_loss:3.6555 train_time:165627ms step_avg:145.93ms step:1146/3200 train_loss:3.7757 train_time:165770ms step_avg:145.92ms step:1147/3200 train_loss:3.7471 train_time:165920ms step_avg:145.93ms step:1148/3200 train_loss:3.6710 train_time:166067ms step_avg:145.93ms step:1149/3200 train_loss:3.6896 train_time:166210ms step_avg:145.93ms step:1150/3200 train_loss:3.5354 train_time:166352ms step_avg:145.92ms step:1151/3200 train_loss:3.5574 train_time:166496ms step_avg:145.92ms step:1152/3200 train_loss:3.5201 train_time:166638ms step_avg:145.92ms step:1153/3200 train_loss:3.6674 train_time:166782ms step_avg:145.92ms step:1154/3200 train_loss:3.6359 train_time:166927ms step_avg:145.92ms step:1155/3200 train_loss:3.7020 train_time:167071ms step_avg:145.91ms step:1156/3200 train_loss:3.5490 train_time:167215ms step_avg:145.91ms step:1157/3200 train_loss:3.7225 train_time:167360ms step_avg:145.91ms step:1158/3200 train_loss:3.6772 train_time:167504ms step_avg:145.91ms step:1159/3200 train_loss:3.4858 train_time:167646ms step_avg:145.91ms step:1160/3200 train_loss:3.5306 train_time:167789ms step_avg:145.90ms step:1161/3200 train_loss:3.5205 train_time:167934ms step_avg:145.90ms step:1162/3200 train_loss:3.3320 train_time:168078ms step_avg:145.90ms step:1163/3200 train_loss:3.6335 train_time:168224ms step_avg:145.90ms step:1164/3200 train_loss:3.5976 train_time:168367ms step_avg:145.90ms step:1165/3200 train_loss:3.4728 train_time:168510ms step_avg:145.90ms step:1166/3200 train_loss:3.4551 train_time:168653ms step_avg:145.89ms step:1167/3200 train_loss:3.5710 train_time:168796ms step_avg:145.89ms step:1168/3200 train_loss:3.5817 train_time:168942ms step_avg:145.89ms step:1169/3200 train_loss:3.8972 train_time:169087ms step_avg:145.89ms step:1170/3200 train_loss:3.5826 train_time:169230ms step_avg:145.89ms step:1171/3200 train_loss:3.5959 train_time:169374ms step_avg:145.89ms step:1172/3200 train_loss:3.5051 train_time:169518ms step_avg:145.88ms step:1173/3200 train_loss:3.5969 train_time:169663ms step_avg:145.88ms step:1174/3200 train_loss:3.7362 train_time:169806ms step_avg:145.88ms step:1175/3200 train_loss:3.5703 train_time:169950ms step_avg:145.88ms step:1176/3200 train_loss:3.5917 train_time:170095ms step_avg:145.88ms step:1177/3200 train_loss:3.6423 train_time:170239ms step_avg:145.88ms step:1178/3200 train_loss:3.6293 train_time:170386ms step_avg:145.88ms step:1179/3200 train_loss:3.6857 train_time:170530ms step_avg:145.88ms step:1180/3200 train_loss:3.5939 train_time:170673ms step_avg:145.87ms step:1181/3200 train_loss:3.5902 train_time:170816ms step_avg:145.87ms step:1182/3200 train_loss:3.5449 train_time:170961ms step_avg:145.87ms step:1183/3200 train_loss:3.5929 train_time:171107ms step_avg:145.87ms step:1184/3200 train_loss:3.5254 train_time:171250ms step_avg:145.87ms step:1185/3200 train_loss:3.6930 train_time:171394ms step_avg:145.87ms step:1186/3200 train_loss:3.7551 train_time:171538ms step_avg:145.87ms step:1187/3200 train_loss:3.5539 train_time:171682ms step_avg:145.86ms step:1188/3200 train_loss:3.6101 train_time:171825ms step_avg:145.86ms step:1189/3200 train_loss:3.6304 train_time:171969ms step_avg:145.86ms step:1190/3200 train_loss:3.4707 train_time:172113ms step_avg:145.86ms step:1191/3200 train_loss:3.6434 train_time:172258ms step_avg:145.86ms step:1192/3200 train_loss:3.7926 train_time:172404ms step_avg:145.86ms step:1193/3200 train_loss:3.5880 train_time:172548ms step_avg:145.86ms step:1194/3200 train_loss:3.4742 train_time:172692ms step_avg:145.85ms step:1195/3200 train_loss:3.7588 train_time:172835ms step_avg:145.85ms step:1196/3200 train_loss:3.5688 train_time:172978ms step_avg:145.85ms step:1197/3200 train_loss:3.5760 train_time:173124ms step_avg:145.85ms step:1198/3200 train_loss:3.4803 train_time:173268ms step_avg:145.85ms step:1199/3200 train_loss:3.4918 train_time:173412ms step_avg:145.85ms step:1200/3200 train_loss:3.5416 train_time:173557ms step_avg:145.85ms step:1201/3200 train_loss:3.6247 train_time:173701ms step_avg:145.84ms step:1202/3200 train_loss:3.6928 train_time:173846ms step_avg:145.84ms step:1203/3200 train_loss:3.7713 train_time:173990ms step_avg:145.84ms step:1204/3200 train_loss:3.6099 train_time:174133ms step_avg:145.84ms step:1205/3200 train_loss:3.5326 train_time:174277ms step_avg:145.84ms step:1206/3200 train_loss:3.6238 train_time:174422ms step_avg:145.84ms step:1207/3200 train_loss:3.6707 train_time:174567ms step_avg:145.84ms step:1208/3200 train_loss:3.7232 train_time:174710ms step_avg:145.84ms step:1209/3200 train_loss:3.5982 train_time:174854ms step_avg:145.83ms step:1210/3200 train_loss:3.4567 train_time:174998ms step_avg:145.83ms step:1211/3200 train_loss:3.5039 train_time:175143ms step_avg:145.83ms step:1212/3200 train_loss:3.5989 train_time:175286ms step_avg:145.83ms step:1213/3200 train_loss:3.6138 train_time:175430ms step_avg:145.83ms step:1214/3200 train_loss:3.6469 train_time:175573ms step_avg:145.82ms step:1215/3200 train_loss:3.5297 train_time:175718ms step_avg:145.82ms step:1216/3200 train_loss:3.5963 train_time:175862ms step_avg:145.82ms step:1217/3200 train_loss:3.5421 train_time:176006ms step_avg:145.82ms step:1218/3200 train_loss:3.5332 train_time:176149ms step_avg:145.82ms step:1219/3200 train_loss:3.6267 train_time:176293ms step_avg:145.82ms step:1220/3200 train_loss:3.4673 train_time:176439ms step_avg:145.82ms step:1221/3200 train_loss:3.6956 train_time:176583ms step_avg:145.82ms step:1222/3200 train_loss:3.7153 train_time:176735ms step_avg:145.82ms step:1223/3200 train_loss:3.6443 train_time:176872ms step_avg:145.81ms step:1224/3200 train_loss:3.4984 train_time:177015ms step_avg:145.81ms step:1225/3200 train_loss:3.4816 train_time:177160ms step_avg:145.81ms step:1226/3200 train_loss:3.5664 train_time:177304ms step_avg:145.81ms step:1227/3200 train_loss:3.5531 train_time:177449ms step_avg:145.81ms step:1228/3200 train_loss:3.4931 train_time:177593ms step_avg:145.81ms step:1229/3200 train_loss:3.6593 train_time:177737ms step_avg:145.81ms step:1230/3200 train_loss:3.5810 train_time:177883ms step_avg:145.81ms step:1231/3200 train_loss:3.6312 train_time:178027ms step_avg:145.80ms step:1232/3200 train_loss:3.7971 train_time:178170ms step_avg:145.80ms step:1233/3200 train_loss:3.6931 train_time:178315ms step_avg:145.80ms step:1234/3200 train_loss:3.6317 train_time:178460ms step_avg:145.80ms step:1235/3200 train_loss:3.7852 train_time:178606ms step_avg:145.80ms step:1236/3200 train_loss:3.5511 train_time:178750ms step_avg:145.80ms step:1237/3200 train_loss:3.5109 train_time:178895ms step_avg:145.80ms step:1238/3200 train_loss:3.4660 train_time:179038ms step_avg:145.80ms step:1239/3200 train_loss:3.5371 train_time:179182ms step_avg:145.80ms step:1240/3200 train_loss:3.5437 train_time:179328ms step_avg:145.79ms step:1241/3200 train_loss:3.5952 train_time:179470ms step_avg:145.79ms step:1242/3200 train_loss:3.6424 train_time:179615ms step_avg:145.79ms step:1243/3200 train_loss:3.5138 train_time:179761ms step_avg:145.79ms step:1244/3200 train_loss:3.6093 train_time:179906ms step_avg:145.79ms step:1245/3200 train_loss:3.6274 train_time:180050ms step_avg:145.79ms step:1246/3200 train_loss:3.6192 train_time:180194ms step_avg:145.79ms step:1247/3200 train_loss:3.4535 train_time:180337ms step_avg:145.79ms step:1248/3200 train_loss:3.5892 train_time:180483ms step_avg:145.79ms step:1249/3200 train_loss:3.6516 train_time:180627ms step_avg:145.78ms step:1250/3200 train_loss:3.6220 train_time:180769ms step_avg:145.78ms step:1250/3200 val_loss:3.5725 train_time:180822ms step_avg:145.82ms step:1251/3200 train_loss:3.5291 train_time:180925ms step_avg:145.79ms step:1252/3200 train_loss:3.7267 train_time:181074ms step_avg:145.79ms step:1253/3200 train_loss:3.5965 train_time:181217ms step_avg:145.79ms step:1254/3200 train_loss:3.5232 train_time:181359ms step_avg:145.79ms step:1255/3200 train_loss:3.6520 train_time:181501ms step_avg:145.78ms step:1256/3200 train_loss:3.7172 train_time:181644ms step_avg:145.78ms step:1257/3200 train_loss:3.5264 train_time:181786ms step_avg:145.78ms step:1258/3200 train_loss:3.5575 train_time:181935ms step_avg:145.78ms step:1259/3200 train_loss:3.5881 train_time:182080ms step_avg:145.78ms step:1260/3200 train_loss:3.5552 train_time:182227ms step_avg:145.78ms step:1261/3200 train_loss:3.4188 train_time:182370ms step_avg:145.78ms step:1262/3200 train_loss:3.5192 train_time:182514ms step_avg:145.78ms step:1263/3200 train_loss:3.5823 train_time:182657ms step_avg:145.78ms step:1264/3200 train_loss:3.4358 train_time:182800ms step_avg:145.77ms step:1265/3200 train_loss:3.6570 train_time:182945ms step_avg:145.77ms step:1266/3200 train_loss:3.6417 train_time:183091ms step_avg:145.77ms step:1267/3200 train_loss:3.6386 train_time:183236ms step_avg:145.77ms step:1268/3200 train_loss:3.5832 train_time:183381ms step_avg:145.77ms step:1269/3200 train_loss:3.6213 train_time:183524ms step_avg:145.77ms step:1270/3200 train_loss:3.4722 train_time:183667ms step_avg:145.77ms step:1271/3200 train_loss:3.3225 train_time:183811ms step_avg:145.77ms step:1272/3200 train_loss:3.6083 train_time:183957ms step_avg:145.77ms step:1273/3200 train_loss:3.5680 train_time:184101ms step_avg:145.76ms step:1274/3200 train_loss:3.6218 train_time:184245ms step_avg:145.76ms step:1275/3200 train_loss:3.5687 train_time:184389ms step_avg:145.76ms step:1276/3200 train_loss:3.6632 train_time:184535ms step_avg:145.76ms step:1277/3200 train_loss:3.6843 train_time:184678ms step_avg:145.76ms step:1278/3200 train_loss:3.6344 train_time:184821ms step_avg:145.76ms step:1279/3200 train_loss:3.6377 train_time:184965ms step_avg:145.76ms step:1280/3200 train_loss:3.4647 train_time:185111ms step_avg:145.76ms step:1281/3200 train_loss:3.5838 train_time:185256ms step_avg:145.76ms step:1282/3200 train_loss:3.6452 train_time:185400ms step_avg:145.75ms step:1283/3200 train_loss:3.6850 train_time:185544ms step_avg:145.75ms step:1284/3200 train_loss:3.5696 train_time:185688ms step_avg:145.75ms step:1285/3200 train_loss:3.5966 train_time:185833ms step_avg:145.75ms step:1286/3200 train_loss:3.5805 train_time:185977ms step_avg:145.75ms step:1287/3200 train_loss:3.5534 train_time:186121ms step_avg:145.75ms step:1288/3200 train_loss:3.6909 train_time:186266ms step_avg:145.75ms step:1289/3200 train_loss:3.5244 train_time:186412ms step_avg:145.75ms step:1290/3200 train_loss:3.6097 train_time:186556ms step_avg:145.75ms step:1291/3200 train_loss:3.6780 train_time:186700ms step_avg:145.75ms step:1292/3200 train_loss:3.6047 train_time:186845ms step_avg:145.74ms step:1293/3200 train_loss:3.7082 train_time:186989ms step_avg:145.74ms step:1294/3200 train_loss:3.7218 train_time:187134ms step_avg:145.74ms step:1295/3200 train_loss:3.6930 train_time:187278ms step_avg:145.74ms step:1296/3200 train_loss:3.5037 train_time:187421ms step_avg:145.74ms step:1297/3200 train_loss:3.5779 train_time:187566ms step_avg:145.74ms step:1298/3200 train_loss:3.4860 train_time:187711ms step_avg:145.74ms step:1299/3200 train_loss:3.5454 train_time:187856ms step_avg:145.74ms step:1300/3200 train_loss:3.6208 train_time:188000ms step_avg:145.74ms step:1301/3200 train_loss:3.6269 train_time:188144ms step_avg:145.73ms step:1302/3200 train_loss:3.6258 train_time:188288ms step_avg:145.73ms step:1303/3200 train_loss:3.7822 train_time:188433ms step_avg:145.73ms step:1304/3200 train_loss:3.5536 train_time:188577ms step_avg:145.73ms step:1305/3200 train_loss:3.7585 train_time:188720ms step_avg:145.73ms step:1306/3200 train_loss:3.4906 train_time:188863ms step_avg:145.73ms step:1307/3200 train_loss:3.6822 train_time:189007ms step_avg:145.73ms step:1308/3200 train_loss:3.6825 train_time:189153ms step_avg:145.73ms step:1309/3200 train_loss:3.5407 train_time:189297ms step_avg:145.73ms step:1310/3200 train_loss:3.5139 train_time:189441ms step_avg:145.72ms step:1311/3200 train_loss:3.5216 train_time:189585ms step_avg:145.72ms step:1312/3200 train_loss:3.5095 train_time:189730ms step_avg:145.72ms step:1313/3200 train_loss:3.6270 train_time:189874ms step_avg:145.72ms step:1314/3200 train_loss:3.5724 train_time:190019ms step_avg:145.72ms step:1315/3200 train_loss:3.2876 train_time:190162ms step_avg:145.72ms step:1316/3200 train_loss:3.5166 train_time:190306ms step_avg:145.72ms step:1317/3200 train_loss:3.6007 train_time:190450ms step_avg:145.72ms step:1318/3200 train_loss:3.6292 train_time:190595ms step_avg:145.71ms step:1319/3200 train_loss:3.5053 train_time:190738ms step_avg:145.71ms step:1320/3200 train_loss:3.6404 train_time:190881ms step_avg:145.71ms step:1321/3200 train_loss:3.6946 train_time:191025ms step_avg:145.71ms step:1322/3200 train_loss:3.5884 train_time:191168ms step_avg:145.71ms step:1323/3200 train_loss:3.5299 train_time:191476ms step_avg:145.83ms step:1324/3200 train_loss:3.5625 train_time:191621ms step_avg:145.83ms step:1325/3200 train_loss:3.6542 train_time:191765ms step_avg:145.83ms step:1326/3200 train_loss:3.7136 train_time:191910ms step_avg:145.83ms step:1327/3200 train_loss:3.4724 train_time:192053ms step_avg:145.83ms step:1328/3200 train_loss:3.3904 train_time:192195ms step_avg:145.82ms step:1329/3200 train_loss:3.6999 train_time:192337ms step_avg:145.82ms step:1330/3200 train_loss:3.5410 train_time:192647ms step_avg:145.94ms step:1331/3200 train_loss:3.6748 train_time:192789ms step_avg:145.94ms step:1332/3200 train_loss:3.5735 train_time:192934ms step_avg:145.94ms step:1333/3200 train_loss:3.9803 train_time:193076ms step_avg:145.94ms step:1334/3200 train_loss:3.6793 train_time:193218ms step_avg:145.94ms step:1335/3200 train_loss:3.5892 train_time:193361ms step_avg:145.93ms step:1336/3200 train_loss:3.5332 train_time:193503ms step_avg:145.93ms step:1337/3200 train_loss:3.5287 train_time:193653ms step_avg:145.93ms step:1338/3200 train_loss:3.7830 train_time:193799ms step_avg:145.93ms step:1339/3200 train_loss:3.7222 train_time:193941ms step_avg:145.93ms step:1340/3200 train_loss:3.5665 train_time:194084ms step_avg:145.93ms step:1341/3200 train_loss:3.5191 train_time:194227ms step_avg:145.93ms step:1342/3200 train_loss:3.8302 train_time:194370ms step_avg:145.92ms step:1343/3200 train_loss:3.5946 train_time:194513ms step_avg:145.92ms step:1344/3200 train_loss:3.5906 train_time:194659ms step_avg:145.92ms step:1345/3200 train_loss:3.6527 train_time:194804ms step_avg:145.92ms step:1346/3200 train_loss:3.6205 train_time:194947ms step_avg:145.92ms step:1347/3200 train_loss:3.5220 train_time:195093ms step_avg:145.92ms step:1348/3200 train_loss:3.4785 train_time:195235ms step_avg:145.92ms step:1349/3200 train_loss:3.5690 train_time:195378ms step_avg:145.91ms step:1350/3200 train_loss:3.4915 train_time:195522ms step_avg:145.91ms step:1351/3200 train_loss:3.6281 train_time:195667ms step_avg:145.91ms step:1352/3200 train_loss:3.4803 train_time:195813ms step_avg:145.91ms step:1353/3200 train_loss:3.5447 train_time:195957ms step_avg:145.91ms step:1354/3200 train_loss:3.6436 train_time:196100ms step_avg:145.91ms step:1355/3200 train_loss:3.4854 train_time:196244ms step_avg:145.91ms step:1356/3200 train_loss:3.4137 train_time:196388ms step_avg:145.90ms step:1357/3200 train_loss:3.7509 train_time:196533ms step_avg:145.90ms step:1358/3200 train_loss:3.6808 train_time:196677ms step_avg:145.90ms step:1359/3200 train_loss:3.4104 train_time:196821ms step_avg:145.90ms step:1360/3200 train_loss:3.6817 train_time:196966ms step_avg:145.90ms step:1361/3200 train_loss:3.5767 train_time:197111ms step_avg:145.90ms step:1362/3200 train_loss:3.4243 train_time:197255ms step_avg:145.90ms step:1363/3200 train_loss:3.6113 train_time:197398ms step_avg:145.90ms step:1364/3200 train_loss:3.5067 train_time:197540ms step_avg:145.89ms step:1365/3200 train_loss:3.5237 train_time:197685ms step_avg:145.89ms step:1366/3200 train_loss:3.5436 train_time:197830ms step_avg:145.89ms step:1367/3200 train_loss:3.6514 train_time:197975ms step_avg:145.89ms step:1368/3200 train_loss:3.6349 train_time:198119ms step_avg:145.89ms step:1369/3200 train_loss:3.5873 train_time:198263ms step_avg:145.89ms step:1370/3200 train_loss:3.5007 train_time:198405ms step_avg:145.89ms step:1371/3200 train_loss:3.8238 train_time:198550ms step_avg:145.89ms step:1372/3200 train_loss:3.5603 train_time:198696ms step_avg:145.89ms step:1373/3200 train_loss:3.5965 train_time:198838ms step_avg:145.88ms step:1374/3200 train_loss:3.5943 train_time:198982ms step_avg:145.88ms step:1375/3200 train_loss:3.3940 train_time:199126ms step_avg:145.88ms step:1375/3200 val_loss:3.5528 train_time:199179ms step_avg:145.92ms step:1376/3200 train_loss:3.7913 train_time:199283ms step_avg:145.89ms step:1377/3200 train_loss:3.5710 train_time:199431ms step_avg:145.89ms step:1378/3200 train_loss:3.7178 train_time:199640ms step_avg:145.94ms step:1379/3200 train_loss:3.7604 train_time:199736ms step_avg:145.90ms step:1380/3200 train_loss:3.4415 train_time:199879ms step_avg:145.90ms step:1381/3200 train_loss:3.5595 train_time:200022ms step_avg:145.89ms step:1382/3200 train_loss:4.0253 train_time:200163ms step_avg:145.89ms step:1383/3200 train_loss:3.4707 train_time:200306ms step_avg:145.89ms step:1384/3200 train_loss:3.6273 train_time:200452ms step_avg:145.89ms step:1385/3200 train_loss:3.7063 train_time:200597ms step_avg:145.89ms step:1386/3200 train_loss:3.6167 train_time:200742ms step_avg:145.89ms step:1387/3200 train_loss:3.6146 train_time:200886ms step_avg:145.89ms step:1388/3200 train_loss:3.4371 train_time:201029ms step_avg:145.88ms step:1389/3200 train_loss:3.5835 train_time:201173ms step_avg:145.88ms step:1390/3200 train_loss:3.5484 train_time:201316ms step_avg:145.88ms step:1391/3200 train_loss:3.8171 train_time:201461ms step_avg:145.88ms step:1392/3200 train_loss:3.5287 train_time:201605ms step_avg:145.88ms step:1393/3200 train_loss:3.5197 train_time:201751ms step_avg:145.88ms step:1394/3200 train_loss:3.4899 train_time:201897ms step_avg:145.88ms step:1395/3200 train_loss:3.7691 train_time:202040ms step_avg:145.88ms step:1396/3200 train_loss:3.6553 train_time:202184ms step_avg:145.88ms step:1397/3200 train_loss:3.6680 train_time:202327ms step_avg:145.87ms step:1398/3200 train_loss:3.5402 train_time:202471ms step_avg:145.87ms step:1399/3200 train_loss:3.5088 train_time:202617ms step_avg:145.87ms step:1400/3200 train_loss:3.5649 train_time:202761ms step_avg:145.87ms step:1401/3200 train_loss:3.5488 train_time:202904ms step_avg:145.87ms step:1402/3200 train_loss:3.5721 train_time:203048ms step_avg:145.87ms step:1403/3200 train_loss:3.5334 train_time:203192ms step_avg:145.87ms step:1404/3200 train_loss:3.7656 train_time:203337ms step_avg:145.87ms step:1405/3200 train_loss:3.5144 train_time:203480ms step_avg:145.86ms step:1406/3200 train_loss:3.5524 train_time:203624ms step_avg:145.86ms step:1407/3200 train_loss:3.5506 train_time:203768ms step_avg:145.86ms step:1408/3200 train_loss:3.4171 train_time:203913ms step_avg:145.86ms step:1409/3200 train_loss:3.5389 train_time:204057ms step_avg:145.86ms step:1410/3200 train_loss:3.5179 train_time:204201ms step_avg:145.86ms step:1411/3200 train_loss:3.5184 train_time:204344ms step_avg:145.86ms step:1412/3200 train_loss:3.6046 train_time:204489ms step_avg:145.86ms step:1413/3200 train_loss:3.5458 train_time:204635ms step_avg:145.86ms step:1414/3200 train_loss:3.5906 train_time:204780ms step_avg:145.85ms step:1415/3200 train_loss:3.5808 train_time:204924ms step_avg:145.85ms step:1416/3200 train_loss:3.6628 train_time:205067ms step_avg:145.85ms step:1417/3200 train_loss:3.4630 train_time:205213ms step_avg:145.85ms step:1418/3200 train_loss:3.5218 train_time:205359ms step_avg:145.85ms step:1419/3200 train_loss:3.6189 train_time:205502ms step_avg:145.85ms step:1420/3200 train_loss:3.6470 train_time:205645ms step_avg:145.85ms step:1421/3200 train_loss:3.6261 train_time:205790ms step_avg:145.85ms step:1422/3200 train_loss:3.6072 train_time:205935ms step_avg:145.85ms step:1423/3200 train_loss:3.5926 train_time:206078ms step_avg:145.84ms step:1424/3200 train_loss:3.5761 train_time:206223ms step_avg:145.84ms step:1425/3200 train_loss:3.5731 train_time:206366ms step_avg:145.84ms step:1426/3200 train_loss:3.4513 train_time:206511ms step_avg:145.84ms step:1427/3200 train_loss:3.5691 train_time:206657ms step_avg:145.84ms step:1428/3200 train_loss:3.5092 train_time:206801ms step_avg:145.84ms step:1429/3200 train_loss:3.6205 train_time:206944ms step_avg:145.84ms step:1430/3200 train_loss:3.5801 train_time:207088ms step_avg:145.84ms step:1431/3200 train_loss:3.5117 train_time:207233ms step_avg:145.84ms step:1432/3200 train_loss:3.5612 train_time:207377ms step_avg:145.83ms step:1433/3200 train_loss:3.5933 train_time:207520ms step_avg:145.83ms step:1434/3200 train_loss:3.4466 train_time:207664ms step_avg:145.83ms step:1435/3200 train_loss:3.5638 train_time:207809ms step_avg:145.83ms step:1436/3200 train_loss:3.3905 train_time:207954ms step_avg:145.83ms step:1437/3200 train_loss:3.4561 train_time:208097ms step_avg:145.83ms step:1438/3200 train_loss:3.6524 train_time:208240ms step_avg:145.83ms step:1439/3200 train_loss:3.6125 train_time:208383ms step_avg:145.82ms step:1440/3200 train_loss:3.5618 train_time:208526ms step_avg:145.82ms step:1441/3200 train_loss:3.4175 train_time:208671ms step_avg:145.82ms step:1442/3200 train_loss:3.5843 train_time:208816ms step_avg:145.82ms step:1443/3200 train_loss:3.6456 train_time:208960ms step_avg:145.82ms step:1444/3200 train_loss:3.7273 train_time:209103ms step_avg:145.82ms step:1445/3200 train_loss:3.6869 train_time:209247ms step_avg:145.82ms step:1446/3200 train_loss:3.5784 train_time:209393ms step_avg:145.82ms step:1447/3200 train_loss:3.4485 train_time:209538ms step_avg:145.82ms step:1448/3200 train_loss:3.5229 train_time:209682ms step_avg:145.82ms step:1449/3200 train_loss:3.5426 train_time:209827ms step_avg:145.81ms step:1450/3200 train_loss:3.6635 train_time:209972ms step_avg:145.81ms step:1451/3200 train_loss:3.6435 train_time:210116ms step_avg:145.81ms step:1452/3200 train_loss:3.4637 train_time:210260ms step_avg:145.81ms step:1453/3200 train_loss:3.5844 train_time:210403ms step_avg:145.81ms step:1454/3200 train_loss:3.4942 train_time:210547ms step_avg:145.81ms step:1455/3200 train_loss:3.5216 train_time:210691ms step_avg:145.81ms step:1456/3200 train_loss:3.5713 train_time:210836ms step_avg:145.81ms step:1457/3200 train_loss:3.5068 train_time:210980ms step_avg:145.80ms step:1458/3200 train_loss:3.4045 train_time:211125ms step_avg:145.80ms step:1459/3200 train_loss:3.6504 train_time:211268ms step_avg:145.80ms step:1460/3200 train_loss:3.5169 train_time:211414ms step_avg:145.80ms step:1461/3200 train_loss:3.5706 train_time:211557ms step_avg:145.80ms step:1462/3200 train_loss:3.6869 train_time:211700ms step_avg:145.80ms step:1463/3200 train_loss:3.5117 train_time:211844ms step_avg:145.80ms step:1464/3200 train_loss:3.7078 train_time:211990ms step_avg:145.80ms step:1465/3200 train_loss:3.5927 train_time:212136ms step_avg:145.80ms step:1466/3200 train_loss:3.5988 train_time:212280ms step_avg:145.80ms step:1467/3200 train_loss:3.5155 train_time:212426ms step_avg:145.80ms step:1468/3200 train_loss:3.6721 train_time:212570ms step_avg:145.80ms step:1469/3200 train_loss:3.5363 train_time:212714ms step_avg:145.79ms step:1470/3200 train_loss:3.5156 train_time:212858ms step_avg:145.79ms step:1471/3200 train_loss:3.5634 train_time:213002ms step_avg:145.79ms step:1472/3200 train_loss:3.4856 train_time:213146ms step_avg:145.79ms step:1473/3200 train_loss:3.5774 train_time:213290ms step_avg:145.79ms step:1474/3200 train_loss:3.6744 train_time:213435ms step_avg:145.79ms step:1475/3200 train_loss:3.5440 train_time:213578ms step_avg:145.79ms step:1476/3200 train_loss:3.3862 train_time:213723ms step_avg:145.79ms step:1477/3200 train_loss:3.5024 train_time:213867ms step_avg:145.79ms step:1478/3200 train_loss:3.4746 train_time:214010ms step_avg:145.78ms step:1479/3200 train_loss:3.5581 train_time:214156ms step_avg:145.78ms step:1480/3200 train_loss:3.6407 train_time:214300ms step_avg:145.78ms step:1481/3200 train_loss:3.5075 train_time:214444ms step_avg:145.78ms step:1482/3200 train_loss:3.6933 train_time:214587ms step_avg:145.78ms step:1483/3200 train_loss:3.6120 train_time:214731ms step_avg:145.78ms step:1484/3200 train_loss:3.5131 train_time:214876ms step_avg:145.78ms step:1485/3200 train_loss:3.5111 train_time:215020ms step_avg:145.78ms step:1486/3200 train_loss:3.5036 train_time:215164ms step_avg:145.78ms step:1487/3200 train_loss:3.4804 train_time:215310ms step_avg:145.77ms step:1488/3200 train_loss:3.5696 train_time:215455ms step_avg:145.77ms step:1489/3200 train_loss:3.4755 train_time:215598ms step_avg:145.77ms step:1490/3200 train_loss:3.5690 train_time:215742ms step_avg:145.77ms step:1491/3200 train_loss:3.5071 train_time:215885ms step_avg:145.77ms step:1492/3200 train_loss:3.4262 train_time:216030ms step_avg:145.77ms step:1493/3200 train_loss:3.5017 train_time:216174ms step_avg:145.77ms step:1494/3200 train_loss:3.6720 train_time:216318ms step_avg:145.77ms step:1495/3200 train_loss:3.5291 train_time:216463ms step_avg:145.77ms step:1496/3200 train_loss:3.2798 train_time:216608ms step_avg:145.77ms step:1497/3200 train_loss:3.5932 train_time:216753ms step_avg:145.77ms step:1498/3200 train_loss:3.5533 train_time:216896ms step_avg:145.76ms step:1499/3200 train_loss:3.5973 train_time:217040ms step_avg:145.76ms step:1500/3200 train_loss:3.5529 train_time:217184ms step_avg:145.76ms step:1500/3200 val_loss:3.5394 train_time:217235ms step_avg:145.80ms step:1501/3200 train_loss:3.5351 train_time:217332ms step_avg:145.76ms step:1502/3200 train_loss:3.3292 train_time:217482ms step_avg:145.77ms step:1503/3200 train_loss:3.6065 train_time:217625ms step_avg:145.76ms step:1504/3200 train_loss:3.4794 train_time:217768ms step_avg:145.76ms step:1505/3200 train_loss:3.4851 train_time:217910ms step_avg:145.76ms step:1506/3200 train_loss:3.4503 train_time:218053ms step_avg:145.76ms step:1507/3200 train_loss:3.5304 train_time:218198ms step_avg:145.76ms step:1508/3200 train_loss:3.4522 train_time:218347ms step_avg:145.76ms step:1509/3200 train_loss:3.7618 train_time:218492ms step_avg:145.76ms step:1510/3200 train_loss:3.5086 train_time:218635ms step_avg:145.76ms step:1511/3200 train_loss:3.5081 train_time:218779ms step_avg:145.76ms step:1512/3200 train_loss:3.6400 train_time:219074ms step_avg:145.85ms step:1513/3200 train_loss:3.6616 train_time:219218ms step_avg:145.85ms step:1514/3200 train_loss:3.5257 train_time:219362ms step_avg:145.85ms step:1515/3200 train_loss:3.3561 train_time:219504ms step_avg:145.85ms step:1516/3200 train_loss:3.4885 train_time:219646ms step_avg:145.85ms step:1517/3200 train_loss:3.4857 train_time:219789ms step_avg:145.85ms step:1518/3200 train_loss:3.5660 train_time:219933ms step_avg:145.84ms step:1519/3200 train_loss:3.4478 train_time:220082ms step_avg:145.85ms step:1520/3200 train_loss:3.7530 train_time:220429ms step_avg:145.98ms step:1521/3200 train_loss:3.4096 train_time:220583ms step_avg:145.98ms step:1522/3200 train_loss:3.4707 train_time:220726ms step_avg:145.98ms step:1523/3200 train_loss:3.6119 train_time:220869ms step_avg:145.98ms step:1524/3200 train_loss:3.4712 train_time:221011ms step_avg:145.98ms step:1525/3200 train_loss:3.5749 train_time:221153ms step_avg:145.98ms step:1526/3200 train_loss:3.5625 train_time:221296ms step_avg:145.97ms step:1527/3200 train_loss:3.5287 train_time:221443ms step_avg:145.97ms step:1528/3200 train_loss:3.5253 train_time:221590ms step_avg:145.97ms step:1529/3200 train_loss:3.6755 train_time:221733ms step_avg:145.97ms step:1530/3200 train_loss:3.6471 train_time:221877ms step_avg:145.97ms step:1531/3200 train_loss:3.4826 train_time:222022ms step_avg:145.97ms step:1532/3200 train_loss:3.4368 train_time:222164ms step_avg:145.97ms step:1533/3200 train_loss:3.5930 train_time:222307ms step_avg:145.97ms step:1534/3200 train_loss:3.5479 train_time:222452ms step_avg:145.97ms step:1535/3200 train_loss:3.5325 train_time:222598ms step_avg:145.97ms step:1536/3200 train_loss:3.5319 train_time:222744ms step_avg:145.97ms step:1537/3200 train_loss:3.4699 train_time:222888ms step_avg:145.96ms step:1538/3200 train_loss:3.5199 train_time:223031ms step_avg:145.96ms step:1539/3200 train_loss:3.6996 train_time:223174ms step_avg:145.96ms step:1540/3200 train_loss:3.6320 train_time:223319ms step_avg:145.96ms step:1541/3200 train_loss:3.5406 train_time:223463ms step_avg:145.96ms step:1542/3200 train_loss:3.4919 train_time:223607ms step_avg:145.96ms step:1543/3200 train_loss:3.4959 train_time:223752ms step_avg:145.96ms step:1544/3200 train_loss:3.4563 train_time:223895ms step_avg:145.96ms step:1545/3200 train_loss:3.5460 train_time:224040ms step_avg:145.95ms step:1546/3200 train_loss:3.5133 train_time:224184ms step_avg:145.95ms step:1547/3200 train_loss:3.4896 train_time:224328ms step_avg:145.95ms step:1548/3200 train_loss:3.4555 train_time:224471ms step_avg:145.95ms step:1549/3200 train_loss:3.4904 train_time:224616ms step_avg:145.95ms step:1550/3200 train_loss:3.6005 train_time:224762ms step_avg:145.95ms step:1551/3200 train_loss:3.5244 train_time:224906ms step_avg:145.95ms step:1552/3200 train_loss:3.4672 train_time:225049ms step_avg:145.95ms step:1553/3200 train_loss:3.4654 train_time:225193ms step_avg:145.94ms step:1554/3200 train_loss:3.4604 train_time:225336ms step_avg:145.94ms step:1555/3200 train_loss:3.5840 train_time:225482ms step_avg:145.94ms step:1556/3200 train_loss:3.5896 train_time:225626ms step_avg:145.94ms step:1557/3200 train_loss:3.5222 train_time:225770ms step_avg:145.94ms step:1558/3200 train_loss:3.5763 train_time:225915ms step_avg:145.94ms step:1559/3200 train_loss:3.4976 train_time:226061ms step_avg:145.94ms step:1560/3200 train_loss:3.4130 train_time:226206ms step_avg:145.94ms step:1561/3200 train_loss:3.6601 train_time:226349ms step_avg:145.94ms step:1562/3200 train_loss:3.4737 train_time:226492ms step_avg:145.94ms step:1563/3200 train_loss:3.4586 train_time:226636ms step_avg:145.93ms step:1564/3200 train_loss:3.5833 train_time:226782ms step_avg:145.93ms step:1565/3200 train_loss:3.4085 train_time:226926ms step_avg:145.93ms step:1566/3200 train_loss:3.4558 train_time:227071ms step_avg:145.93ms step:1567/3200 train_loss:3.6138 train_time:227214ms step_avg:145.93ms step:1568/3200 train_loss:3.4940 train_time:227358ms step_avg:145.93ms step:1569/3200 train_loss:3.4764 train_time:227503ms step_avg:145.93ms step:1570/3200 train_loss:3.5724 train_time:227646ms step_avg:145.93ms step:1571/3200 train_loss:3.5837 train_time:227789ms step_avg:145.93ms step:1572/3200 train_loss:3.4106 train_time:227934ms step_avg:145.92ms step:1573/3200 train_loss:3.4426 train_time:228078ms step_avg:145.92ms step:1574/3200 train_loss:3.5594 train_time:228223ms step_avg:145.92ms step:1575/3200 train_loss:3.4290 train_time:228367ms step_avg:145.92ms step:1576/3200 train_loss:3.5736 train_time:228511ms step_avg:145.92ms step:1577/3200 train_loss:3.4789 train_time:228654ms step_avg:145.92ms step:1578/3200 train_loss:3.5323 train_time:228799ms step_avg:145.92ms step:1579/3200 train_loss:3.5086 train_time:228944ms step_avg:145.92ms step:1580/3200 train_loss:3.4757 train_time:229087ms step_avg:145.92ms step:1581/3200 train_loss:3.4524 train_time:229230ms step_avg:145.91ms step:1582/3200 train_loss:3.6928 train_time:229376ms step_avg:145.91ms step:1583/3200 train_loss:3.4652 train_time:229522ms step_avg:145.91ms step:1584/3200 train_loss:3.6246 train_time:229666ms step_avg:145.91ms step:1585/3200 train_loss:3.4499 train_time:229810ms step_avg:145.91ms step:1586/3200 train_loss:3.6104 train_time:229955ms step_avg:145.91ms step:1587/3200 train_loss:3.3993 train_time:230099ms step_avg:145.91ms step:1588/3200 train_loss:3.5930 train_time:230243ms step_avg:145.91ms step:1589/3200 train_loss:3.5037 train_time:230386ms step_avg:145.91ms step:1590/3200 train_loss:3.6525 train_time:230529ms step_avg:145.90ms step:1591/3200 train_loss:3.4730 train_time:230674ms step_avg:145.90ms step:1592/3200 train_loss:3.4924 train_time:230820ms step_avg:145.90ms step:1593/3200 train_loss:3.5590 train_time:230965ms step_avg:145.90ms step:1594/3200 train_loss:3.5371 train_time:231109ms step_avg:145.90ms step:1595/3200 train_loss:3.5144 train_time:231252ms step_avg:145.90ms step:1596/3200 train_loss:3.6542 train_time:231396ms step_avg:145.90ms step:1597/3200 train_loss:3.3839 train_time:231540ms step_avg:145.90ms step:1598/3200 train_loss:3.5469 train_time:231684ms step_avg:145.90ms step:1599/3200 train_loss:3.5852 train_time:231828ms step_avg:145.90ms step:1600/3200 train_loss:3.6330 train_time:231971ms step_avg:145.89ms step:1601/3200 train_loss:3.4851 train_time:232115ms step_avg:145.89ms step:1602/3200 train_loss:3.7722 train_time:232260ms step_avg:145.89ms step:1603/3200 train_loss:3.6655 train_time:232403ms step_avg:145.89ms step:1604/3200 train_loss:3.4453 train_time:232546ms step_avg:145.89ms step:1605/3200 train_loss:3.4852 train_time:232690ms step_avg:145.89ms step:1606/3200 train_loss:3.3674 train_time:232834ms step_avg:145.89ms step:1607/3200 train_loss:3.6998 train_time:232978ms step_avg:145.89ms step:1608/3200 train_loss:3.4940 train_time:233124ms step_avg:145.88ms step:1609/3200 train_loss:3.5166 train_time:233268ms step_avg:145.88ms step:1610/3200 train_loss:3.4699 train_time:233411ms step_avg:145.88ms step:1611/3200 train_loss:4.0709 train_time:233554ms step_avg:145.88ms step:1612/3200 train_loss:3.7002 train_time:233698ms step_avg:145.88ms step:1613/3200 train_loss:3.6106 train_time:233842ms step_avg:145.88ms step:1614/3200 train_loss:3.4815 train_time:233986ms step_avg:145.88ms step:1615/3200 train_loss:3.5226 train_time:234129ms step_avg:145.87ms step:1616/3200 train_loss:3.5178 train_time:234276ms step_avg:145.88ms step:1617/3200 train_loss:3.4825 train_time:234421ms step_avg:145.88ms step:1618/3200 train_loss:3.5569 train_time:234565ms step_avg:145.87ms step:1619/3200 train_loss:3.5046 train_time:234709ms step_avg:145.87ms step:1620/3200 train_loss:3.3997 train_time:234853ms step_avg:145.87ms step:1621/3200 train_loss:3.6715 train_time:234997ms step_avg:145.87ms step:1622/3200 train_loss:3.5838 train_time:235141ms step_avg:145.87ms step:1623/3200 train_loss:3.3685 train_time:235285ms step_avg:145.87ms step:1624/3200 train_loss:3.4896 train_time:235429ms step_avg:145.87ms step:1625/3200 train_loss:3.4400 train_time:235573ms step_avg:145.87ms step:1625/3200 val_loss:3.5188 train_time:235626ms step_avg:145.90ms step:1626/3200 train_loss:3.5243 train_time:235730ms step_avg:145.87ms step:1627/3200 train_loss:3.4842 train_time:235876ms step_avg:145.87ms step:1628/3200 train_loss:3.4554 train_time:236019ms step_avg:145.87ms step:1629/3200 train_loss:3.5598 train_time:236162ms step_avg:145.87ms step:1630/3200 train_loss:3.4568 train_time:236305ms step_avg:145.87ms step:1631/3200 train_loss:3.5139 train_time:236449ms step_avg:145.87ms step:1632/3200 train_loss:3.3961 train_time:236592ms step_avg:145.86ms step:1633/3200 train_loss:3.3583 train_time:236736ms step_avg:145.86ms step:1634/3200 train_loss:3.5213 train_time:236884ms step_avg:145.86ms step:1635/3200 train_loss:3.5130 train_time:237029ms step_avg:145.86ms step:1636/3200 train_loss:3.4544 train_time:237172ms step_avg:145.86ms step:1637/3200 train_loss:3.5380 train_time:237315ms step_avg:145.86ms step:1638/3200 train_loss:3.5872 train_time:237458ms step_avg:145.86ms step:1639/3200 train_loss:3.6218 train_time:237601ms step_avg:145.86ms step:1640/3200 train_loss:3.7805 train_time:237747ms step_avg:145.86ms step:1641/3200 train_loss:3.5962 train_time:237893ms step_avg:145.86ms step:1642/3200 train_loss:3.5177 train_time:238037ms step_avg:145.86ms step:1643/3200 train_loss:3.5994 train_time:238181ms step_avg:145.85ms step:1644/3200 train_loss:3.5038 train_time:238323ms step_avg:145.85ms step:1645/3200 train_loss:3.5246 train_time:238468ms step_avg:145.85ms step:1646/3200 train_loss:3.5149 train_time:238612ms step_avg:145.85ms step:1647/3200 train_loss:3.2961 train_time:238755ms step_avg:145.85ms step:1648/3200 train_loss:3.5581 train_time:238900ms step_avg:145.85ms step:1649/3200 train_loss:3.4192 train_time:239044ms step_avg:145.85ms step:1650/3200 train_loss:3.4995 train_time:239190ms step_avg:145.85ms step:1651/3200 train_loss:3.4700 train_time:239333ms step_avg:145.85ms step:1652/3200 train_loss:3.5442 train_time:239476ms step_avg:145.84ms step:1653/3200 train_loss:3.4730 train_time:239619ms step_avg:145.84ms step:1654/3200 train_loss:3.5970 train_time:239764ms step_avg:145.84ms step:1655/3200 train_loss:3.5895 train_time:239908ms step_avg:145.84ms step:1656/3200 train_loss:3.4092 train_time:240056ms step_avg:145.84ms step:1657/3200 train_loss:3.5757 train_time:240198ms step_avg:145.84ms step:1658/3200 train_loss:3.4611 train_time:240343ms step_avg:145.84ms step:1659/3200 train_loss:3.4447 train_time:240488ms step_avg:145.84ms step:1660/3200 train_loss:3.5239 train_time:240632ms step_avg:145.84ms step:1661/3200 train_loss:3.5483 train_time:240776ms step_avg:145.84ms step:1662/3200 train_loss:3.4657 train_time:240920ms step_avg:145.84ms step:1663/3200 train_loss:3.5631 train_time:241063ms step_avg:145.83ms step:1664/3200 train_loss:3.5673 train_time:241209ms step_avg:145.83ms step:1665/3200 train_loss:3.5993 train_time:241354ms step_avg:145.83ms step:1666/3200 train_loss:3.5702 train_time:241497ms step_avg:145.83ms step:1667/3200 train_loss:3.7092 train_time:241641ms step_avg:145.83ms step:1668/3200 train_loss:3.4182 train_time:241785ms step_avg:145.83ms step:1669/3200 train_loss:3.5031 train_time:241931ms step_avg:145.83ms step:1670/3200 train_loss:3.4279 train_time:242075ms step_avg:145.83ms step:1671/3200 train_loss:3.4322 train_time:242218ms step_avg:145.83ms step:1672/3200 train_loss:3.5877 train_time:242362ms step_avg:145.83ms step:1673/3200 train_loss:3.7693 train_time:242506ms step_avg:145.82ms step:1674/3200 train_loss:3.4850 train_time:242650ms step_avg:145.82ms step:1675/3200 train_loss:3.4706 train_time:242792ms step_avg:145.82ms step:1676/3200 train_loss:3.3616 train_time:242936ms step_avg:145.82ms step:1677/3200 train_loss:3.5668 train_time:243079ms step_avg:145.82ms step:1678/3200 train_loss:3.4812 train_time:243224ms step_avg:145.82ms step:1679/3200 train_loss:3.5084 train_time:243369ms step_avg:145.82ms step:1680/3200 train_loss:3.4971 train_time:243512ms step_avg:145.82ms step:1681/3200 train_loss:3.3174 train_time:243655ms step_avg:145.81ms step:1682/3200 train_loss:3.5025 train_time:243798ms step_avg:145.81ms step:1683/3200 train_loss:3.5132 train_time:243941ms step_avg:145.81ms step:1684/3200 train_loss:3.5571 train_time:244086ms step_avg:145.81ms step:1685/3200 train_loss:3.5471 train_time:244231ms step_avg:145.81ms step:1686/3200 train_loss:3.4636 train_time:244375ms step_avg:145.81ms step:1687/3200 train_loss:3.5690 train_time:244519ms step_avg:145.81ms step:1688/3200 train_loss:3.4525 train_time:244661ms step_avg:145.81ms step:1689/3200 train_loss:3.5303 train_time:244806ms step_avg:145.80ms step:1690/3200 train_loss:3.4471 train_time:244952ms step_avg:145.80ms step:1691/3200 train_loss:3.3505 train_time:245095ms step_avg:145.80ms step:1692/3200 train_loss:3.5033 train_time:245238ms step_avg:145.80ms step:1693/3200 train_loss:3.4960 train_time:245382ms step_avg:145.80ms step:1694/3200 train_loss:3.4165 train_time:245526ms step_avg:145.80ms step:1695/3200 train_loss:3.8545 train_time:245668ms step_avg:145.80ms step:1696/3200 train_loss:3.5691 train_time:245814ms step_avg:145.80ms step:1697/3200 train_loss:3.5529 train_time:245958ms step_avg:145.80ms step:1698/3200 train_loss:3.4563 train_time:246102ms step_avg:145.79ms step:1699/3200 train_loss:3.3714 train_time:246247ms step_avg:145.79ms step:1700/3200 train_loss:3.4614 train_time:246392ms step_avg:145.79ms step:1701/3200 train_loss:3.4575 train_time:246694ms step_avg:145.89ms step:1702/3200 train_loss:3.5305 train_time:246838ms step_avg:145.89ms step:1703/3200 train_loss:3.4487 train_time:246980ms step_avg:145.88ms step:1704/3200 train_loss:3.6511 train_time:247124ms step_avg:145.88ms step:1705/3200 train_loss:3.4192 train_time:247266ms step_avg:145.88ms step:1706/3200 train_loss:3.6441 train_time:247410ms step_avg:145.88ms step:1707/3200 train_loss:3.4885 train_time:247553ms step_avg:145.88ms step:1708/3200 train_loss:3.2823 train_time:247699ms step_avg:145.88ms step:1709/3200 train_loss:3.6006 train_time:247845ms step_avg:145.88ms step:1710/3200 train_loss:3.5142 train_time:248154ms step_avg:145.97ms step:1711/3200 train_loss:3.5056 train_time:248296ms step_avg:145.97ms step:1712/3200 train_loss:3.4985 train_time:248440ms step_avg:145.97ms step:1713/3200 train_loss:3.5350 train_time:248583ms step_avg:145.97ms step:1714/3200 train_loss:3.5579 train_time:248727ms step_avg:145.97ms step:1715/3200 train_loss:3.4793 train_time:248871ms step_avg:145.97ms step:1716/3200 train_loss:3.4886 train_time:249015ms step_avg:145.96ms step:1717/3200 train_loss:3.3190 train_time:249163ms step_avg:145.97ms step:1718/3200 train_loss:3.4598 train_time:249308ms step_avg:145.96ms step:1719/3200 train_loss:3.4795 train_time:249452ms step_avg:145.96ms step:1720/3200 train_loss:3.4293 train_time:249595ms step_avg:145.96ms step:1721/3200 train_loss:3.5837 train_time:249738ms step_avg:145.96ms step:1722/3200 train_loss:3.3886 train_time:249882ms step_avg:145.96ms step:1723/3200 train_loss:3.5315 train_time:250027ms step_avg:145.96ms step:1724/3200 train_loss:3.6149 train_time:250174ms step_avg:145.96ms step:1725/3200 train_loss:3.4691 train_time:250317ms step_avg:145.96ms step:1726/3200 train_loss:3.6922 train_time:250462ms step_avg:145.96ms step:1727/3200 train_loss:3.4804 train_time:250604ms step_avg:145.95ms step:1728/3200 train_loss:3.5419 train_time:250748ms step_avg:145.95ms step:1729/3200 train_loss:3.5166 train_time:250892ms step_avg:145.95ms step:1730/3200 train_loss:3.5198 train_time:251036ms step_avg:145.95ms step:1731/3200 train_loss:3.8833 train_time:251181ms step_avg:145.95ms step:1732/3200 train_loss:3.5063 train_time:251327ms step_avg:145.95ms step:1733/3200 train_loss:3.6287 train_time:251471ms step_avg:145.95ms step:1734/3200 train_loss:3.4161 train_time:251614ms step_avg:145.95ms step:1735/3200 train_loss:3.4594 train_time:251758ms step_avg:145.95ms step:1736/3200 train_loss:3.4832 train_time:251901ms step_avg:145.94ms step:1737/3200 train_loss:3.4590 train_time:252045ms step_avg:145.94ms step:1738/3200 train_loss:3.5996 train_time:252191ms step_avg:145.94ms step:1739/3200 train_loss:3.4667 train_time:252335ms step_avg:145.94ms step:1740/3200 train_loss:3.5232 train_time:252479ms step_avg:145.94ms step:1741/3200 train_loss:3.5839 train_time:252623ms step_avg:145.94ms step:1742/3200 train_loss:3.3818 train_time:252768ms step_avg:145.94ms step:1743/3200 train_loss:3.2745 train_time:252911ms step_avg:145.94ms step:1744/3200 train_loss:3.2128 train_time:253054ms step_avg:145.94ms step:1745/3200 train_loss:3.5003 train_time:253197ms step_avg:145.94ms step:1746/3200 train_loss:3.5132 train_time:253343ms step_avg:145.93ms step:1747/3200 train_loss:3.4856 train_time:253488ms step_avg:145.93ms step:1748/3200 train_loss:3.4973 train_time:253634ms step_avg:145.93ms step:1749/3200 train_loss:3.7352 train_time:253779ms step_avg:145.93ms step:1750/3200 train_loss:3.4431 train_time:253924ms step_avg:145.93ms step:1750/3200 val_loss:3.4982 train_time:253977ms step_avg:145.96ms step:1751/3200 train_loss:3.5174 train_time:254080ms step_avg:145.94ms step:1752/3200 train_loss:3.5092 train_time:254226ms step_avg:145.94ms step:1753/3200 train_loss:3.1433 train_time:254371ms step_avg:145.94ms step:1754/3200 train_loss:3.2641 train_time:254515ms step_avg:145.94ms step:1755/3200 train_loss:3.3584 train_time:254658ms step_avg:145.94ms step:1756/3200 train_loss:3.3101 train_time:254800ms step_avg:145.93ms step:1757/3200 train_loss:3.4711 train_time:254942ms step_avg:145.93ms step:1758/3200 train_loss:3.3548 train_time:255088ms step_avg:145.93ms step:1759/3200 train_loss:3.3547 train_time:255235ms step_avg:145.93ms step:1760/3200 train_loss:4.4079 train_time:255380ms step_avg:145.93ms step:1761/3200 train_loss:3.4821 train_time:255523ms step_avg:145.93ms step:1762/3200 train_loss:3.5240 train_time:255666ms step_avg:145.93ms step:1763/3200 train_loss:3.5190 train_time:255810ms step_avg:145.93ms step:1764/3200 train_loss:3.5353 train_time:255956ms step_avg:145.93ms step:1765/3200 train_loss:3.4485 train_time:256101ms step_avg:145.93ms step:1766/3200 train_loss:3.4921 train_time:256247ms step_avg:145.93ms step:1767/3200 train_loss:3.5048 train_time:256392ms step_avg:145.93ms step:1768/3200 train_loss:3.7564 train_time:256537ms step_avg:145.93ms step:1769/3200 train_loss:3.4856 train_time:256680ms step_avg:145.92ms step:1770/3200 train_loss:3.5479 train_time:256822ms step_avg:145.92ms step:1771/3200 train_loss:3.8698 train_time:256965ms step_avg:145.92ms step:1772/3200 train_loss:3.4818 train_time:257110ms step_avg:145.92ms step:1773/3200 train_loss:3.3899 train_time:257256ms step_avg:145.92ms step:1774/3200 train_loss:3.6393 train_time:257400ms step_avg:145.92ms step:1775/3200 train_loss:3.3863 train_time:257543ms step_avg:145.92ms step:1776/3200 train_loss:3.5500 train_time:257688ms step_avg:145.92ms step:1777/3200 train_loss:3.5990 train_time:257831ms step_avg:145.91ms step:1778/3200 train_loss:3.6930 train_time:257975ms step_avg:145.91ms step:1779/3200 train_loss:3.4890 train_time:258120ms step_avg:145.91ms step:1780/3200 train_loss:3.7854 train_time:258264ms step_avg:145.91ms step:1781/3200 train_loss:3.5644 train_time:258409ms step_avg:145.91ms step:1782/3200 train_loss:3.5782 train_time:258554ms step_avg:145.91ms step:1783/3200 train_loss:3.3641 train_time:258699ms step_avg:145.91ms step:1784/3200 train_loss:3.4483 train_time:258843ms step_avg:145.91ms step:1785/3200 train_loss:3.5939 train_time:258987ms step_avg:145.91ms step:1786/3200 train_loss:3.4768 train_time:259131ms step_avg:145.91ms step:1787/3200 train_loss:3.6491 train_time:259276ms step_avg:145.91ms step:1788/3200 train_loss:3.4591 train_time:259420ms step_avg:145.91ms step:1789/3200 train_loss:3.4378 train_time:259564ms step_avg:145.90ms step:1790/3200 train_loss:3.5768 train_time:259708ms step_avg:145.90ms step:1791/3200 train_loss:3.4817 train_time:259853ms step_avg:145.90ms step:1792/3200 train_loss:3.4371 train_time:259997ms step_avg:145.90ms step:1793/3200 train_loss:3.5647 train_time:260142ms step_avg:145.90ms step:1794/3200 train_loss:3.4400 train_time:260286ms step_avg:145.90ms step:1795/3200 train_loss:3.4300 train_time:260430ms step_avg:145.90ms step:1796/3200 train_loss:3.4948 train_time:260576ms step_avg:145.90ms step:1797/3200 train_loss:3.4489 train_time:260720ms step_avg:145.90ms step:1798/3200 train_loss:3.5912 train_time:260864ms step_avg:145.90ms step:1799/3200 train_loss:3.4726 train_time:261007ms step_avg:145.90ms step:1800/3200 train_loss:3.5491 train_time:261152ms step_avg:145.90ms step:1801/3200 train_loss:3.4839 train_time:261296ms step_avg:145.89ms step:1802/3200 train_loss:3.5145 train_time:261439ms step_avg:145.89ms step:1803/3200 train_loss:3.4290 train_time:261583ms step_avg:145.89ms step:1804/3200 train_loss:3.3585 train_time:261727ms step_avg:145.89ms step:1805/3200 train_loss:3.6078 train_time:261871ms step_avg:145.89ms step:1806/3200 train_loss:3.5295 train_time:262016ms step_avg:145.89ms step:1807/3200 train_loss:3.5427 train_time:262159ms step_avg:145.89ms step:1808/3200 train_loss:3.6488 train_time:262303ms step_avg:145.89ms step:1809/3200 train_loss:3.4441 train_time:262447ms step_avg:145.88ms step:1810/3200 train_loss:3.5451 train_time:262591ms step_avg:145.88ms step:1811/3200 train_loss:3.6829 train_time:262735ms step_avg:145.88ms step:1812/3200 train_loss:3.5342 train_time:262880ms step_avg:145.88ms step:1813/3200 train_loss:3.5748 train_time:263023ms step_avg:145.88ms step:1814/3200 train_loss:3.6012 train_time:263166ms step_avg:145.88ms step:1815/3200 train_loss:3.5454 train_time:263312ms step_avg:145.88ms step:1816/3200 train_loss:3.5761 train_time:263457ms step_avg:145.88ms step:1817/3200 train_loss:3.5299 train_time:263601ms step_avg:145.88ms step:1818/3200 train_loss:3.5914 train_time:263745ms step_avg:145.88ms step:1819/3200 train_loss:3.5153 train_time:263889ms step_avg:145.88ms step:1820/3200 train_loss:3.5114 train_time:264032ms step_avg:145.87ms step:1821/3200 train_loss:3.4564 train_time:264177ms step_avg:145.87ms step:1822/3200 train_loss:3.4304 train_time:264321ms step_avg:145.87ms step:1823/3200 train_loss:3.3580 train_time:264465ms step_avg:145.87ms step:1824/3200 train_loss:3.5167 train_time:264608ms step_avg:145.87ms step:1825/3200 train_loss:3.6270 train_time:264755ms step_avg:145.87ms step:1826/3200 train_loss:3.5963 train_time:264900ms step_avg:145.87ms step:1827/3200 train_loss:3.5721 train_time:265044ms step_avg:145.87ms step:1828/3200 train_loss:3.4356 train_time:265187ms step_avg:145.87ms step:1829/3200 train_loss:3.4563 train_time:265332ms step_avg:145.87ms step:1830/3200 train_loss:3.6031 train_time:265477ms step_avg:145.87ms step:1831/3200 train_loss:3.3725 train_time:265621ms step_avg:145.87ms step:1832/3200 train_loss:3.5248 train_time:265764ms step_avg:145.86ms step:1833/3200 train_loss:3.4076 train_time:265907ms step_avg:145.86ms step:1834/3200 train_loss:3.7244 train_time:266053ms step_avg:145.86ms step:1835/3200 train_loss:3.5602 train_time:266199ms step_avg:145.86ms step:1836/3200 train_loss:3.5377 train_time:266343ms step_avg:145.86ms step:1837/3200 train_loss:3.6654 train_time:266486ms step_avg:145.86ms step:1838/3200 train_loss:3.5285 train_time:266630ms step_avg:145.86ms step:1839/3200 train_loss:3.4098 train_time:266776ms step_avg:145.86ms step:1840/3200 train_loss:3.5237 train_time:266920ms step_avg:145.86ms step:1841/3200 train_loss:3.4091 train_time:267064ms step_avg:145.86ms step:1842/3200 train_loss:3.5205 train_time:267209ms step_avg:145.86ms step:1843/3200 train_loss:3.5708 train_time:267354ms step_avg:145.86ms step:1844/3200 train_loss:3.3227 train_time:267499ms step_avg:145.86ms step:1845/3200 train_loss:3.4462 train_time:267643ms step_avg:145.85ms step:1846/3200 train_loss:3.5116 train_time:267787ms step_avg:145.85ms step:1847/3200 train_loss:3.4510 train_time:267930ms step_avg:145.85ms step:1848/3200 train_loss:3.3523 train_time:268075ms step_avg:145.85ms step:1849/3200 train_loss:3.6218 train_time:268219ms step_avg:145.85ms step:1850/3200 train_loss:3.3885 train_time:268364ms step_avg:145.85ms step:1851/3200 train_loss:3.4695 train_time:268508ms step_avg:145.85ms step:1852/3200 train_loss:3.4268 train_time:268652ms step_avg:145.85ms step:1853/3200 train_loss:3.6251 train_time:268796ms step_avg:145.85ms step:1854/3200 train_loss:3.6046 train_time:268941ms step_avg:145.85ms step:1855/3200 train_loss:3.4772 train_time:269085ms step_avg:145.85ms step:1856/3200 train_loss:3.4324 train_time:269230ms step_avg:145.84ms step:1857/3200 train_loss:3.4613 train_time:269376ms step_avg:145.85ms step:1858/3200 train_loss:3.7088 train_time:269521ms step_avg:145.84ms step:1859/3200 train_loss:3.5457 train_time:269664ms step_avg:145.84ms step:1860/3200 train_loss:3.4862 train_time:269810ms step_avg:145.84ms step:1861/3200 train_loss:3.5238 train_time:269955ms step_avg:145.84ms step:1862/3200 train_loss:3.4230 train_time:270098ms step_avg:145.84ms step:1863/3200 train_loss:3.4193 train_time:270243ms step_avg:145.84ms step:1864/3200 train_loss:3.4883 train_time:270387ms step_avg:145.84ms step:1865/3200 train_loss:3.5301 train_time:270530ms step_avg:145.84ms step:1866/3200 train_loss:3.2834 train_time:270675ms step_avg:145.84ms step:1867/3200 train_loss:3.4217 train_time:270820ms step_avg:145.84ms step:1868/3200 train_loss:3.3757 train_time:270965ms step_avg:145.84ms step:1869/3200 train_loss:3.3748 train_time:271109ms step_avg:145.84ms step:1870/3200 train_loss:3.5357 train_time:271253ms step_avg:145.84ms step:1871/3200 train_loss:3.5159 train_time:271398ms step_avg:145.83ms step:1872/3200 train_loss:3.4688 train_time:271542ms step_avg:145.83ms step:1873/3200 train_loss:3.4740 train_time:271685ms step_avg:145.83ms step:1874/3200 train_loss:3.4119 train_time:271830ms step_avg:145.83ms step:1875/3200 train_loss:3.5111 train_time:271982ms step_avg:145.83ms step:1875/3200 val_loss:3.4853 train_time:272029ms step_avg:145.86ms step:1876/3200 train_loss:3.5069 train_time:272132ms step_avg:145.84ms step:1877/3200 train_loss:3.4375 train_time:272282ms step_avg:145.84ms step:1878/3200 train_loss:3.4855 train_time:272425ms step_avg:145.84ms step:1879/3200 train_loss:3.5925 train_time:272567ms step_avg:145.84ms step:1880/3200 train_loss:3.4641 train_time:272710ms step_avg:145.83ms step:1881/3200 train_loss:3.5243 train_time:272852ms step_avg:145.83ms step:1882/3200 train_loss:3.4406 train_time:272995ms step_avg:145.83ms step:1883/3200 train_loss:3.5081 train_time:273141ms step_avg:145.83ms step:1884/3200 train_loss:3.5054 train_time:273288ms step_avg:145.83ms step:1885/3200 train_loss:3.2571 train_time:273433ms step_avg:145.83ms step:1886/3200 train_loss:3.6621 train_time:273577ms step_avg:145.83ms step:1887/3200 train_loss:3.3938 train_time:273721ms step_avg:145.83ms step:1888/3200 train_loss:3.4053 train_time:273864ms step_avg:145.83ms step:1889/3200 train_loss:3.4885 train_time:274008ms step_avg:145.83ms step:1890/3200 train_loss:3.5301 train_time:274314ms step_avg:145.91ms step:1891/3200 train_loss:3.3505 train_time:274461ms step_avg:145.91ms step:1892/3200 train_loss:3.6269 train_time:274604ms step_avg:145.91ms step:1893/3200 train_loss:3.3793 train_time:274746ms step_avg:145.91ms step:1894/3200 train_loss:3.5131 train_time:274889ms step_avg:145.91ms step:1895/3200 train_loss:3.5493 train_time:275031ms step_avg:145.91ms step:1896/3200 train_loss:3.3532 train_time:275175ms step_avg:145.90ms step:1897/3200 train_loss:3.5159 train_time:275324ms step_avg:145.91ms step:1898/3200 train_loss:3.4748 train_time:275471ms step_avg:145.91ms step:1899/3200 train_loss:3.5548 train_time:275615ms step_avg:145.91ms step:1900/3200 train_loss:3.3343 train_time:275941ms step_avg:146.00ms step:1901/3200 train_loss:3.5799 train_time:276084ms step_avg:146.00ms step:1902/3200 train_loss:3.4636 train_time:276225ms step_avg:146.00ms step:1903/3200 train_loss:3.6261 train_time:276368ms step_avg:145.99ms step:1904/3200 train_loss:3.4311 train_time:276510ms step_avg:145.99ms step:1905/3200 train_loss:3.7028 train_time:276652ms step_avg:145.99ms step:1906/3200 train_loss:3.4428 train_time:276797ms step_avg:145.99ms step:1907/3200 train_loss:3.4358 train_time:276945ms step_avg:145.99ms step:1908/3200 train_loss:3.5081 train_time:277089ms step_avg:145.99ms step:1909/3200 train_loss:3.3855 train_time:277233ms step_avg:145.99ms step:1910/3200 train_loss:3.4615 train_time:277377ms step_avg:145.99ms step:1911/3200 train_loss:3.5521 train_time:277521ms step_avg:145.99ms step:1912/3200 train_loss:3.4756 train_time:277664ms step_avg:145.99ms step:1913/3200 train_loss:3.3640 train_time:277807ms step_avg:145.98ms step:1914/3200 train_loss:3.2289 train_time:277952ms step_avg:145.98ms step:1915/3200 train_loss:3.4279 train_time:278097ms step_avg:145.98ms step:1916/3200 train_loss:3.6429 train_time:278242ms step_avg:145.98ms step:1917/3200 train_loss:3.6376 train_time:278385ms step_avg:145.98ms step:1918/3200 train_loss:3.5890 train_time:278528ms step_avg:145.98ms step:1919/3200 train_loss:3.4185 train_time:278671ms step_avg:145.98ms step:1920/3200 train_loss:3.6669 train_time:278815ms step_avg:145.98ms step:1921/3200 train_loss:3.4893 train_time:278960ms step_avg:145.98ms step:1922/3200 train_loss:3.4247 train_time:279107ms step_avg:145.98ms step:1923/3200 train_loss:3.5950 train_time:279251ms step_avg:145.98ms step:1924/3200 train_loss:3.5597 train_time:279394ms step_avg:145.97ms step:1925/3200 train_loss:3.3964 train_time:279537ms step_avg:145.97ms step:1926/3200 train_loss:3.4372 train_time:279682ms step_avg:145.97ms step:1927/3200 train_loss:3.3421 train_time:279825ms step_avg:145.97ms step:1928/3200 train_loss:3.4536 train_time:279969ms step_avg:145.97ms step:1929/3200 train_loss:3.3104 train_time:280115ms step_avg:145.97ms step:1930/3200 train_loss:3.4295 train_time:280259ms step_avg:145.97ms step:1931/3200 train_loss:3.5612 train_time:280403ms step_avg:145.97ms step:1932/3200 train_loss:3.4302 train_time:280545ms step_avg:145.97ms step:1933/3200 train_loss:3.5683 train_time:280688ms step_avg:145.96ms step:1934/3200 train_loss:3.4403 train_time:280831ms step_avg:145.96ms step:1935/3200 train_loss:3.4873 train_time:280976ms step_avg:145.96ms step:1936/3200 train_loss:3.5229 train_time:281123ms step_avg:145.96ms step:1937/3200 train_loss:3.4794 train_time:281267ms step_avg:145.96ms step:1938/3200 train_loss:3.5072 train_time:281410ms step_avg:145.96ms step:1939/3200 train_loss:3.4328 train_time:281554ms step_avg:145.96ms step:1940/3200 train_loss:3.5321 train_time:281699ms step_avg:145.96ms step:1941/3200 train_loss:3.5628 train_time:281843ms step_avg:145.96ms step:1942/3200 train_loss:3.4045 train_time:281987ms step_avg:145.96ms step:1943/3200 train_loss:3.4368 train_time:282130ms step_avg:145.95ms step:1944/3200 train_loss:3.5009 train_time:282275ms step_avg:145.95ms step:1945/3200 train_loss:3.3527 train_time:282420ms step_avg:145.95ms step:1946/3200 train_loss:3.6188 train_time:282564ms step_avg:145.95ms step:1947/3200 train_loss:3.4926 train_time:282709ms step_avg:145.95ms step:1948/3200 train_loss:3.4652 train_time:282853ms step_avg:145.95ms step:1949/3200 train_loss:3.4656 train_time:282996ms step_avg:145.95ms step:1950/3200 train_loss:3.3554 train_time:283142ms step_avg:145.95ms step:1951/3200 train_loss:3.4711 train_time:283287ms step_avg:145.95ms step:1952/3200 train_loss:3.3182 train_time:283431ms step_avg:145.95ms step:1953/3200 train_loss:3.5248 train_time:283575ms step_avg:145.95ms step:1954/3200 train_loss:3.5228 train_time:283719ms step_avg:145.95ms step:1955/3200 train_loss:3.4722 train_time:283863ms step_avg:145.95ms step:1956/3200 train_loss:3.3624 train_time:284006ms step_avg:145.94ms step:1957/3200 train_loss:3.4551 train_time:284149ms step_avg:145.94ms step:1958/3200 train_loss:3.6340 train_time:284292ms step_avg:145.94ms step:1959/3200 train_loss:3.5585 train_time:284436ms step_avg:145.94ms step:1960/3200 train_loss:3.5745 train_time:284582ms step_avg:145.94ms step:1961/3200 train_loss:3.3799 train_time:284725ms step_avg:145.94ms step:1962/3200 train_loss:3.5022 train_time:284869ms step_avg:145.94ms step:1963/3200 train_loss:3.5449 train_time:285014ms step_avg:145.94ms step:1964/3200 train_loss:3.4937 train_time:285158ms step_avg:145.94ms step:1965/3200 train_loss:3.4040 train_time:285304ms step_avg:145.94ms step:1966/3200 train_loss:3.8077 train_time:285447ms step_avg:145.93ms step:1967/3200 train_loss:3.4198 train_time:285590ms step_avg:145.93ms step:1968/3200 train_loss:3.4583 train_time:285735ms step_avg:145.93ms step:1969/3200 train_loss:3.5099 train_time:285881ms step_avg:145.93ms step:1970/3200 train_loss:3.4661 train_time:286024ms step_avg:145.93ms step:1971/3200 train_loss:3.3570 train_time:286169ms step_avg:145.93ms step:1972/3200 train_loss:3.3347 train_time:286313ms step_avg:145.93ms step:1973/3200 train_loss:3.4558 train_time:286458ms step_avg:145.93ms step:1974/3200 train_loss:3.4287 train_time:286603ms step_avg:145.93ms step:1975/3200 train_loss:3.4105 train_time:286747ms step_avg:145.93ms step:1976/3200 train_loss:3.5628 train_time:286890ms step_avg:145.93ms step:1977/3200 train_loss:3.4331 train_time:287035ms step_avg:145.93ms step:1978/3200 train_loss:3.7916 train_time:287180ms step_avg:145.92ms step:1979/3200 train_loss:3.4775 train_time:287324ms step_avg:145.92ms step:1980/3200 train_loss:3.4786 train_time:287468ms step_avg:145.92ms step:1981/3200 train_loss:3.4861 train_time:287612ms step_avg:145.92ms step:1982/3200 train_loss:3.5141 train_time:287755ms step_avg:145.92ms step:1983/3200 train_loss:3.4389 train_time:287900ms step_avg:145.92ms step:1984/3200 train_loss:3.3985 train_time:288043ms step_avg:145.92ms step:1985/3200 train_loss:3.4647 train_time:288189ms step_avg:145.92ms step:1986/3200 train_loss:3.5229 train_time:288333ms step_avg:145.92ms step:1987/3200 train_loss:3.4989 train_time:288478ms step_avg:145.92ms step:1988/3200 train_loss:3.4680 train_time:288624ms step_avg:145.92ms step:1989/3200 train_loss:3.5510 train_time:288767ms step_avg:145.92ms step:1990/3200 train_loss:3.5888 train_time:288911ms step_avg:145.91ms step:1991/3200 train_loss:3.3674 train_time:289055ms step_avg:145.91ms step:1992/3200 train_loss:3.3577 train_time:289200ms step_avg:145.91ms step:1993/3200 train_loss:3.5423 train_time:289344ms step_avg:145.91ms step:1994/3200 train_loss:3.3661 train_time:289487ms step_avg:145.91ms step:1995/3200 train_loss:3.4516 train_time:289630ms step_avg:145.91ms step:1996/3200 train_loss:3.5313 train_time:289775ms step_avg:145.91ms step:1997/3200 train_loss:3.3928 train_time:289921ms step_avg:145.91ms step:1998/3200 train_loss:3.4953 train_time:290066ms step_avg:145.91ms step:1999/3200 train_loss:3.4946 train_time:290209ms step_avg:145.91ms step:2000/3200 train_loss:3.4164 train_time:290353ms step_avg:145.91ms step:2000/3200 val_loss:3.4717 train_time:290405ms step_avg:145.93ms step:2001/3200 train_loss:3.5602 train_time:290503ms step_avg:145.91ms step:2002/3200 train_loss:3.5010 train_time:290654ms step_avg:145.91ms step:2003/3200 train_loss:3.5943 train_time:290796ms step_avg:145.91ms step:2004/3200 train_loss:3.5110 train_time:290938ms step_avg:145.91ms step:2005/3200 train_loss:3.5196 train_time:291081ms step_avg:145.91ms step:2006/3200 train_loss:3.4106 train_time:291223ms step_avg:145.90ms step:2007/3200 train_loss:3.4361 train_time:291368ms step_avg:145.90ms step:2008/3200 train_loss:3.4908 train_time:291517ms step_avg:145.90ms step:2009/3200 train_loss:3.5213 train_time:291662ms step_avg:145.90ms step:2010/3200 train_loss:3.4230 train_time:291806ms step_avg:145.90ms step:2011/3200 train_loss:3.5072 train_time:291949ms step_avg:145.90ms step:2012/3200 train_loss:3.4804 train_time:292093ms step_avg:145.90ms step:2013/3200 train_loss:3.4874 train_time:292236ms step_avg:145.90ms step:2014/3200 train_loss:3.3939 train_time:292380ms step_avg:145.90ms step:2015/3200 train_loss:3.4490 train_time:292525ms step_avg:145.90ms step:2016/3200 train_loss:3.4636 train_time:292670ms step_avg:145.90ms step:2017/3200 train_loss:3.6002 train_time:292815ms step_avg:145.90ms step:2018/3200 train_loss:3.4493 train_time:292958ms step_avg:145.90ms step:2019/3200 train_loss:3.5908 train_time:293102ms step_avg:145.89ms step:2020/3200 train_loss:3.6096 train_time:293245ms step_avg:145.89ms step:2021/3200 train_loss:3.3124 train_time:293390ms step_avg:145.89ms step:2022/3200 train_loss:3.5418 train_time:293535ms step_avg:145.89ms step:2023/3200 train_loss:3.4730 train_time:293679ms step_avg:145.89ms step:2024/3200 train_loss:3.5723 train_time:293823ms step_avg:145.89ms step:2025/3200 train_loss:3.6098 train_time:293967ms step_avg:145.89ms step:2026/3200 train_loss:3.4008 train_time:294113ms step_avg:145.89ms step:2027/3200 train_loss:3.4212 train_time:294256ms step_avg:145.89ms step:2028/3200 train_loss:3.3432 train_time:294399ms step_avg:145.89ms step:2029/3200 train_loss:3.4480 train_time:294543ms step_avg:145.89ms step:2030/3200 train_loss:3.3737 train_time:294688ms step_avg:145.89ms step:2031/3200 train_loss:3.4639 train_time:294832ms step_avg:145.88ms step:2032/3200 train_loss:3.4595 train_time:294976ms step_avg:145.88ms step:2033/3200 train_loss:3.4763 train_time:295119ms step_avg:145.88ms step:2034/3200 train_loss:3.3630 train_time:295261ms step_avg:145.88ms step:2035/3200 train_loss:3.5289 train_time:295405ms step_avg:145.88ms step:2036/3200 train_loss:3.5329 train_time:295551ms step_avg:145.88ms step:2037/3200 train_loss:3.5184 train_time:295697ms step_avg:145.88ms step:2038/3200 train_loss:3.3888 train_time:295840ms step_avg:145.88ms step:2039/3200 train_loss:3.6523 train_time:295984ms step_avg:145.88ms step:2040/3200 train_loss:3.4847 train_time:296127ms step_avg:145.88ms step:2041/3200 train_loss:3.5017 train_time:296271ms step_avg:145.87ms step:2042/3200 train_loss:3.4504 train_time:296415ms step_avg:145.87ms step:2043/3200 train_loss:3.3498 train_time:296557ms step_avg:145.87ms step:2044/3200 train_loss:3.4713 train_time:296703ms step_avg:145.87ms step:2045/3200 train_loss:3.4682 train_time:296847ms step_avg:145.87ms step:2046/3200 train_loss:3.3369 train_time:296993ms step_avg:145.87ms step:2047/3200 train_loss:3.4169 train_time:297137ms step_avg:145.87ms step:2048/3200 train_loss:3.4889 train_time:297281ms step_avg:145.87ms step:2049/3200 train_loss:3.4369 train_time:297425ms step_avg:145.87ms step:2050/3200 train_loss:3.4867 train_time:297567ms step_avg:145.87ms step:2051/3200 train_loss:3.6345 train_time:297714ms step_avg:145.87ms step:2052/3200 train_loss:3.4923 train_time:297857ms step_avg:145.87ms step:2053/3200 train_loss:3.4506 train_time:298001ms step_avg:145.86ms step:2054/3200 train_loss:3.4271 train_time:298143ms step_avg:145.86ms step:2055/3200 train_loss:3.2973 train_time:298288ms step_avg:145.86ms step:2056/3200 train_loss:3.4110 train_time:298434ms step_avg:145.86ms step:2057/3200 train_loss:3.5863 train_time:298577ms step_avg:145.86ms step:2058/3200 train_loss:3.6088 train_time:298721ms step_avg:145.86ms step:2059/3200 train_loss:3.4679 train_time:298865ms step_avg:145.86ms step:2060/3200 train_loss:3.5081 train_time:299012ms step_avg:145.86ms step:2061/3200 train_loss:3.4985 train_time:299155ms step_avg:145.86ms step:2062/3200 train_loss:3.4435 train_time:299299ms step_avg:145.86ms step:2063/3200 train_loss:3.3624 train_time:299443ms step_avg:145.86ms step:2064/3200 train_loss:3.6664 train_time:299587ms step_avg:145.86ms step:2065/3200 train_loss:3.5213 train_time:299733ms step_avg:145.86ms step:2066/3200 train_loss:3.4782 train_time:299877ms step_avg:145.85ms step:2067/3200 train_loss:3.5237 train_time:300020ms step_avg:145.85ms step:2068/3200 train_loss:3.4268 train_time:300163ms step_avg:145.85ms step:2069/3200 train_loss:3.4810 train_time:300308ms step_avg:145.85ms step:2070/3200 train_loss:3.6133 train_time:300453ms step_avg:145.85ms step:2071/3200 train_loss:3.6153 train_time:300597ms step_avg:145.85ms step:2072/3200 train_loss:3.4643 train_time:300739ms step_avg:145.85ms step:2073/3200 train_loss:3.5015 train_time:300883ms step_avg:145.85ms step:2074/3200 train_loss:3.3861 train_time:301027ms step_avg:145.85ms step:2075/3200 train_loss:3.9199 train_time:301172ms step_avg:145.85ms step:2076/3200 train_loss:3.3457 train_time:301316ms step_avg:145.85ms step:2077/3200 train_loss:3.5082 train_time:301460ms step_avg:145.84ms step:2078/3200 train_loss:3.3999 train_time:301603ms step_avg:145.84ms step:2079/3200 train_loss:3.3761 train_time:301910ms step_avg:145.92ms step:2080/3200 train_loss:3.4696 train_time:302058ms step_avg:145.92ms step:2081/3200 train_loss:3.7232 train_time:302200ms step_avg:145.92ms step:2082/3200 train_loss:3.3503 train_time:302343ms step_avg:145.92ms step:2083/3200 train_loss:3.6849 train_time:302486ms step_avg:145.92ms step:2084/3200 train_loss:3.3941 train_time:302628ms step_avg:145.92ms step:2085/3200 train_loss:3.3770 train_time:302771ms step_avg:145.91ms step:2086/3200 train_loss:3.6189 train_time:302918ms step_avg:145.91ms step:2087/3200 train_loss:3.5498 train_time:303064ms step_avg:145.91ms step:2088/3200 train_loss:3.5346 train_time:303208ms step_avg:145.91ms step:2089/3200 train_loss:3.5932 train_time:303353ms step_avg:145.91ms step:2090/3200 train_loss:3.5176 train_time:303674ms step_avg:146.00ms step:2091/3200 train_loss:3.5044 train_time:303816ms step_avg:146.00ms step:2092/3200 train_loss:3.4613 train_time:303959ms step_avg:145.99ms step:2093/3200 train_loss:3.5309 train_time:304102ms step_avg:145.99ms step:2094/3200 train_loss:3.4324 train_time:304244ms step_avg:145.99ms step:2095/3200 train_loss:3.2261 train_time:304387ms step_avg:145.99ms step:2096/3200 train_loss:3.4542 train_time:304533ms step_avg:145.99ms step:2097/3200 train_loss:3.6245 train_time:304681ms step_avg:145.99ms step:2098/3200 train_loss:3.4504 train_time:304824ms step_avg:145.99ms step:2099/3200 train_loss:3.3414 train_time:304968ms step_avg:145.99ms step:2100/3200 train_loss:3.4403 train_time:305114ms step_avg:145.99ms step:2101/3200 train_loss:3.4027 train_time:305256ms step_avg:145.99ms step:2102/3200 train_loss:3.5419 train_time:305398ms step_avg:145.98ms step:2103/3200 train_loss:3.3741 train_time:305542ms step_avg:145.98ms step:2104/3200 train_loss:3.3496 train_time:305688ms step_avg:145.98ms step:2105/3200 train_loss:3.5997 train_time:305836ms step_avg:145.98ms step:2106/3200 train_loss:3.3312 train_time:305979ms step_avg:145.98ms step:2107/3200 train_loss:3.7309 train_time:306122ms step_avg:145.98ms step:2108/3200 train_loss:3.5701 train_time:306266ms step_avg:145.98ms step:2109/3200 train_loss:3.4655 train_time:306409ms step_avg:145.98ms step:2110/3200 train_loss:3.4910 train_time:306555ms step_avg:145.98ms step:2111/3200 train_loss:3.3103 train_time:306700ms step_avg:145.98ms step:2112/3200 train_loss:3.7916 train_time:306844ms step_avg:145.98ms step:2113/3200 train_loss:3.4846 train_time:306988ms step_avg:145.98ms step:2114/3200 train_loss:3.4187 train_time:307132ms step_avg:145.98ms step:2115/3200 train_loss:3.5292 train_time:307276ms step_avg:145.97ms step:2116/3200 train_loss:3.4905 train_time:307419ms step_avg:145.97ms step:2117/3200 train_loss:3.4801 train_time:307563ms step_avg:145.97ms step:2118/3200 train_loss:3.5314 train_time:307707ms step_avg:145.97ms step:2119/3200 train_loss:3.3884 train_time:307854ms step_avg:145.97ms step:2120/3200 train_loss:3.4532 train_time:307999ms step_avg:145.97ms step:2121/3200 train_loss:3.1487 train_time:308142ms step_avg:145.97ms step:2122/3200 train_loss:3.3473 train_time:308285ms step_avg:145.97ms step:2123/3200 train_loss:3.5120 train_time:308428ms step_avg:145.97ms step:2124/3200 train_loss:3.4253 train_time:308573ms step_avg:145.97ms step:2125/3200 train_loss:3.5874 train_time:308718ms step_avg:145.97ms step:2125/3200 val_loss:3.4613 train_time:308769ms step_avg:145.99ms step:2126/3200 train_loss:3.4472 train_time:308872ms step_avg:145.97ms step:2127/3200 train_loss:3.5610 train_time:309019ms step_avg:145.97ms step:2128/3200 train_loss:3.5445 train_time:309162ms step_avg:145.97ms step:2129/3200 train_loss:3.4053 train_time:309305ms step_avg:145.97ms step:2130/3200 train_loss:3.3878 train_time:309447ms step_avg:145.97ms step:2131/3200 train_loss:3.4109 train_time:309590ms step_avg:145.96ms step:2132/3200 train_loss:3.5711 train_time:309734ms step_avg:145.96ms step:2133/3200 train_loss:3.4484 train_time:309879ms step_avg:145.96ms step:2134/3200 train_loss:3.3547 train_time:310024ms step_avg:145.96ms step:2135/3200 train_loss:3.4156 train_time:310169ms step_avg:145.96ms step:2136/3200 train_loss:3.5399 train_time:310314ms step_avg:145.96ms step:2137/3200 train_loss:3.5544 train_time:310457ms step_avg:145.96ms step:2138/3200 train_loss:3.4968 train_time:310600ms step_avg:145.96ms step:2139/3200 train_loss:3.4903 train_time:310744ms step_avg:145.96ms step:2140/3200 train_loss:3.4730 train_time:310889ms step_avg:145.96ms step:2141/3200 train_loss:3.5579 train_time:311034ms step_avg:145.96ms step:2142/3200 train_loss:3.8611 train_time:311179ms step_avg:145.96ms step:2143/3200 train_loss:3.3891 train_time:311322ms step_avg:145.96ms step:2144/3200 train_loss:3.4205 train_time:311465ms step_avg:145.95ms step:2145/3200 train_loss:3.4587 train_time:311608ms step_avg:145.95ms step:2146/3200 train_loss:3.5883 train_time:311753ms step_avg:145.95ms step:2147/3200 train_loss:3.5222 train_time:311898ms step_avg:145.95ms step:2148/3200 train_loss:3.9290 train_time:312042ms step_avg:145.95ms step:2149/3200 train_loss:3.4448 train_time:312186ms step_avg:145.95ms step:2150/3200 train_loss:3.4105 train_time:312332ms step_avg:145.95ms step:2151/3200 train_loss:3.4867 train_time:312476ms step_avg:145.95ms step:2152/3200 train_loss:3.5177 train_time:312619ms step_avg:145.95ms step:2153/3200 train_loss:3.4696 train_time:312763ms step_avg:145.95ms step:2154/3200 train_loss:3.4060 train_time:312907ms step_avg:145.95ms step:2155/3200 train_loss:3.6102 train_time:313054ms step_avg:145.95ms step:2156/3200 train_loss:3.2371 train_time:313198ms step_avg:145.95ms step:2157/3200 train_loss:3.3965 train_time:313342ms step_avg:145.94ms step:2158/3200 train_loss:3.5283 train_time:313486ms step_avg:145.94ms step:2159/3200 train_loss:3.4771 train_time:313630ms step_avg:145.94ms step:2160/3200 train_loss:3.6277 train_time:313776ms step_avg:145.94ms step:2161/3200 train_loss:3.5457 train_time:313919ms step_avg:145.94ms step:2162/3200 train_loss:3.4690 train_time:314064ms step_avg:145.94ms step:2163/3200 train_loss:3.4418 train_time:314209ms step_avg:145.94ms step:2164/3200 train_loss:3.4363 train_time:314354ms step_avg:145.94ms step:2165/3200 train_loss:3.5225 train_time:314497ms step_avg:145.94ms step:2166/3200 train_loss:3.5422 train_time:314642ms step_avg:145.94ms step:2167/3200 train_loss:3.4718 train_time:314785ms step_avg:145.94ms step:2168/3200 train_loss:3.3665 train_time:314931ms step_avg:145.94ms step:2169/3200 train_loss:3.4547 train_time:315077ms step_avg:145.94ms step:2170/3200 train_loss:3.4877 train_time:315221ms step_avg:145.94ms step:2171/3200 train_loss:3.6202 train_time:315365ms step_avg:145.93ms step:2172/3200 train_loss:3.4079 train_time:315509ms step_avg:145.93ms step:2173/3200 train_loss:3.3997 train_time:315654ms step_avg:145.93ms step:2174/3200 train_loss:3.4136 train_time:315796ms step_avg:145.93ms step:2175/3200 train_loss:3.4618 train_time:315940ms step_avg:145.93ms step:2176/3200 train_loss:3.4256 train_time:316084ms step_avg:145.93ms step:2177/3200 train_loss:3.3988 train_time:316228ms step_avg:145.93ms step:2178/3200 train_loss:3.6160 train_time:316375ms step_avg:145.93ms step:2179/3200 train_loss:3.4423 train_time:316519ms step_avg:145.93ms step:2180/3200 train_loss:3.4563 train_time:316663ms step_avg:145.93ms step:2181/3200 train_loss:3.5079 train_time:316808ms step_avg:145.93ms step:2182/3200 train_loss:3.4893 train_time:316952ms step_avg:145.93ms step:2183/3200 train_loss:3.4493 train_time:317096ms step_avg:145.93ms step:2184/3200 train_loss:3.3526 train_time:317241ms step_avg:145.92ms step:2185/3200 train_loss:3.5282 train_time:317385ms step_avg:145.92ms step:2186/3200 train_loss:3.6900 train_time:317529ms step_avg:145.92ms step:2187/3200 train_loss:3.3333 train_time:317675ms step_avg:145.92ms step:2188/3200 train_loss:3.3826 train_time:317818ms step_avg:145.92ms step:2189/3200 train_loss:3.2264 train_time:317963ms step_avg:145.92ms step:2190/3200 train_loss:3.3805 train_time:318106ms step_avg:145.92ms step:2191/3200 train_loss:3.5228 train_time:318250ms step_avg:145.92ms step:2192/3200 train_loss:3.4554 train_time:318395ms step_avg:145.92ms step:2193/3200 train_loss:3.6961 train_time:318538ms step_avg:145.92ms step:2194/3200 train_loss:3.4617 train_time:318682ms step_avg:145.92ms step:2195/3200 train_loss:3.5229 train_time:318825ms step_avg:145.92ms step:2196/3200 train_loss:3.4653 train_time:318970ms step_avg:145.91ms step:2197/3200 train_loss:3.3889 train_time:319115ms step_avg:145.91ms step:2198/3200 train_loss:3.4645 train_time:319259ms step_avg:145.91ms step:2199/3200 train_loss:3.4078 train_time:319403ms step_avg:145.91ms step:2200/3200 train_loss:3.4114 train_time:319545ms step_avg:145.91ms step:2201/3200 train_loss:3.4643 train_time:319691ms step_avg:145.91ms step:2202/3200 train_loss:3.4469 train_time:319835ms step_avg:145.91ms step:2203/3200 train_loss:3.4265 train_time:319979ms step_avg:145.91ms step:2204/3200 train_loss:3.9292 train_time:320122ms step_avg:145.91ms step:2205/3200 train_loss:3.3420 train_time:320267ms step_avg:145.91ms step:2206/3200 train_loss:3.4625 train_time:320413ms step_avg:145.91ms step:2207/3200 train_loss:3.4810 train_time:320557ms step_avg:145.91ms step:2208/3200 train_loss:3.4961 train_time:320702ms step_avg:145.91ms step:2209/3200 train_loss:3.3940 train_time:320846ms step_avg:145.91ms step:2210/3200 train_loss:3.4682 train_time:320990ms step_avg:145.90ms step:2211/3200 train_loss:3.4785 train_time:321136ms step_avg:145.90ms step:2212/3200 train_loss:3.4738 train_time:321281ms step_avg:145.90ms step:2213/3200 train_loss:3.4941 train_time:321425ms step_avg:145.90ms step:2214/3200 train_loss:3.3578 train_time:321569ms step_avg:145.90ms step:2215/3200 train_loss:3.4200 train_time:321714ms step_avg:145.90ms step:2216/3200 train_loss:3.5621 train_time:321859ms step_avg:145.90ms step:2217/3200 train_loss:3.5137 train_time:322003ms step_avg:145.90ms step:2218/3200 train_loss:3.4714 train_time:322147ms step_avg:145.90ms step:2219/3200 train_loss:3.4832 train_time:322292ms step_avg:145.90ms step:2220/3200 train_loss:3.3833 train_time:322437ms step_avg:145.90ms step:2221/3200 train_loss:3.6521 train_time:322580ms step_avg:145.90ms step:2222/3200 train_loss:3.5322 train_time:322724ms step_avg:145.90ms step:2223/3200 train_loss:3.5614 train_time:322869ms step_avg:145.90ms step:2224/3200 train_loss:3.4426 train_time:323014ms step_avg:145.90ms step:2225/3200 train_loss:3.5686 train_time:323158ms step_avg:145.90ms step:2226/3200 train_loss:3.3212 train_time:323301ms step_avg:145.89ms step:2227/3200 train_loss:3.5904 train_time:323445ms step_avg:145.89ms step:2228/3200 train_loss:3.5307 train_time:323588ms step_avg:145.89ms step:2229/3200 train_loss:3.3285 train_time:323733ms step_avg:145.89ms step:2230/3200 train_loss:3.6769 train_time:323879ms step_avg:145.89ms step:2231/3200 train_loss:3.3653 train_time:324021ms step_avg:145.89ms step:2232/3200 train_loss:3.8298 train_time:324164ms step_avg:145.89ms step:2233/3200 train_loss:3.5205 train_time:324311ms step_avg:145.89ms step:2234/3200 train_loss:3.4691 train_time:324455ms step_avg:145.89ms step:2235/3200 train_loss:3.4912 train_time:324598ms step_avg:145.89ms step:2236/3200 train_loss:3.2744 train_time:324743ms step_avg:145.89ms step:2237/3200 train_loss:3.2805 train_time:324887ms step_avg:145.89ms step:2238/3200 train_loss:3.5053 train_time:325032ms step_avg:145.89ms step:2239/3200 train_loss:3.6043 train_time:325177ms step_avg:145.88ms step:2240/3200 train_loss:3.3203 train_time:325322ms step_avg:145.88ms step:2241/3200 train_loss:3.3848 train_time:325464ms step_avg:145.88ms step:2242/3200 train_loss:3.5756 train_time:325610ms step_avg:145.88ms step:2243/3200 train_loss:3.5386 train_time:325755ms step_avg:145.88ms step:2244/3200 train_loss:3.3972 train_time:325899ms step_avg:145.88ms step:2245/3200 train_loss:3.4689 train_time:326043ms step_avg:145.88ms step:2246/3200 train_loss:3.4903 train_time:326186ms step_avg:145.88ms step:2247/3200 train_loss:3.3213 train_time:326331ms step_avg:145.88ms step:2248/3200 train_loss:3.3440 train_time:326477ms step_avg:145.88ms step:2249/3200 train_loss:3.6036 train_time:326620ms step_avg:145.88ms step:2250/3200 train_loss:3.3244 train_time:326764ms step_avg:145.88ms step:2250/3200 val_loss:3.4507 train_time:326815ms step_avg:145.90ms step:2251/3200 train_loss:3.3313 train_time:326917ms step_avg:145.88ms step:2252/3200 train_loss:3.3990 train_time:327066ms step_avg:145.88ms step:2253/3200 train_loss:3.3715 train_time:327211ms step_avg:145.88ms step:2254/3200 train_loss:3.4312 train_time:327353ms step_avg:145.88ms step:2255/3200 train_loss:3.4831 train_time:327495ms step_avg:145.88ms step:2256/3200 train_loss:3.3511 train_time:327638ms step_avg:145.88ms step:2257/3200 train_loss:3.6470 train_time:327782ms step_avg:145.88ms step:2258/3200 train_loss:3.5178 train_time:327928ms step_avg:145.88ms step:2259/3200 train_loss:3.8340 train_time:328074ms step_avg:145.88ms step:2260/3200 train_loss:3.5217 train_time:328221ms step_avg:145.88ms step:2261/3200 train_loss:3.5692 train_time:328366ms step_avg:145.88ms step:2262/3200 train_loss:3.4802 train_time:328508ms step_avg:145.87ms step:2263/3200 train_loss:3.4867 train_time:328650ms step_avg:145.87ms step:2264/3200 train_loss:3.2372 train_time:328792ms step_avg:145.87ms step:2265/3200 train_loss:3.3673 train_time:328941ms step_avg:145.87ms step:2266/3200 train_loss:3.5774 train_time:329085ms step_avg:145.87ms step:2267/3200 train_loss:3.3159 train_time:329230ms step_avg:145.87ms step:2268/3200 train_loss:3.3816 train_time:329538ms step_avg:145.94ms step:2269/3200 train_loss:3.3638 train_time:329684ms step_avg:145.94ms step:2270/3200 train_loss:3.3306 train_time:329829ms step_avg:145.94ms step:2271/3200 train_loss:3.7315 train_time:329971ms step_avg:145.94ms step:2272/3200 train_loss:3.3840 train_time:330113ms step_avg:145.94ms step:2273/3200 train_loss:3.3867 train_time:330256ms step_avg:145.94ms step:2274/3200 train_loss:3.4737 train_time:330399ms step_avg:145.94ms step:2275/3200 train_loss:3.4182 train_time:330546ms step_avg:145.94ms step:2276/3200 train_loss:3.4359 train_time:330691ms step_avg:145.94ms step:2277/3200 train_loss:3.3192 train_time:330836ms step_avg:145.94ms step:2278/3200 train_loss:3.4289 train_time:330982ms step_avg:145.94ms step:2279/3200 train_loss:3.5514 train_time:331124ms step_avg:145.93ms step:2280/3200 train_loss:3.3518 train_time:331439ms step_avg:146.01ms step:2281/3200 train_loss:3.4079 train_time:331580ms step_avg:146.01ms step:2282/3200 train_loss:3.4275 train_time:331723ms step_avg:146.00ms step:2283/3200 train_loss:3.5653 train_time:331864ms step_avg:146.00ms step:2284/3200 train_loss:3.4399 train_time:332007ms step_avg:146.00ms step:2285/3200 train_loss:3.4632 train_time:332149ms step_avg:146.00ms step:2286/3200 train_loss:3.4603 train_time:332293ms step_avg:146.00ms step:2287/3200 train_loss:3.4606 train_time:332444ms step_avg:146.00ms step:2288/3200 train_loss:3.4180 train_time:332588ms step_avg:146.00ms step:2289/3200 train_loss:3.5486 train_time:332730ms step_avg:146.00ms step:2290/3200 train_loss:3.5162 train_time:332874ms step_avg:146.00ms step:2291/3200 train_loss:3.4041 train_time:333017ms step_avg:146.00ms step:2292/3200 train_loss:3.7472 train_time:333160ms step_avg:145.99ms step:2293/3200 train_loss:3.4071 train_time:333304ms step_avg:145.99ms step:2294/3200 train_loss:3.3572 train_time:333449ms step_avg:145.99ms step:2295/3200 train_loss:3.5370 train_time:333595ms step_avg:145.99ms step:2296/3200 train_loss:3.4768 train_time:333739ms step_avg:145.99ms step:2297/3200 train_loss:3.4525 train_time:333883ms step_avg:145.99ms step:2298/3200 train_loss:3.8351 train_time:334026ms step_avg:145.99ms step:2299/3200 train_loss:3.3496 train_time:334168ms step_avg:145.99ms step:2300/3200 train_loss:3.3615 train_time:334311ms step_avg:145.99ms step:2301/3200 train_loss:3.6876 train_time:334458ms step_avg:145.99ms step:2302/3200 train_loss:3.4105 train_time:334605ms step_avg:145.99ms step:2303/3200 train_loss:3.4344 train_time:334749ms step_avg:145.99ms step:2304/3200 train_loss:3.4179 train_time:334893ms step_avg:145.99ms step:2305/3200 train_loss:3.3554 train_time:335037ms step_avg:145.99ms step:2306/3200 train_loss:3.5082 train_time:335181ms step_avg:145.98ms step:2307/3200 train_loss:3.3792 train_time:335325ms step_avg:145.98ms step:2308/3200 train_loss:3.3938 train_time:335469ms step_avg:145.98ms step:2309/3200 train_loss:3.5255 train_time:335613ms step_avg:145.98ms step:2310/3200 train_loss:3.4824 train_time:335758ms step_avg:145.98ms step:2311/3200 train_loss:3.3511 train_time:335903ms step_avg:145.98ms step:2312/3200 train_loss:3.4654 train_time:336048ms step_avg:145.98ms step:2313/3200 train_loss:3.5881 train_time:336191ms step_avg:145.98ms step:2314/3200 train_loss:3.4031 train_time:336335ms step_avg:145.98ms step:2315/3200 train_loss:3.3333 train_time:336481ms step_avg:145.98ms step:2316/3200 train_loss:3.4225 train_time:336625ms step_avg:145.98ms step:2317/3200 train_loss:3.3093 train_time:336768ms step_avg:145.98ms step:2318/3200 train_loss:3.4095 train_time:336912ms step_avg:145.98ms step:2319/3200 train_loss:3.4366 train_time:337056ms step_avg:145.97ms step:2320/3200 train_loss:3.2832 train_time:337202ms step_avg:145.97ms step:2321/3200 train_loss:3.4179 train_time:337346ms step_avg:145.97ms step:2322/3200 train_loss:3.4679 train_time:337489ms step_avg:145.97ms step:2323/3200 train_loss:3.3826 train_time:337633ms step_avg:145.97ms step:2324/3200 train_loss:3.4317 train_time:337778ms step_avg:145.97ms step:2325/3200 train_loss:3.3466 train_time:337923ms step_avg:145.97ms step:2326/3200 train_loss:3.4938 train_time:338066ms step_avg:145.97ms step:2327/3200 train_loss:3.4947 train_time:338212ms step_avg:145.97ms step:2328/3200 train_loss:3.2713 train_time:338354ms step_avg:145.97ms step:2329/3200 train_loss:3.3838 train_time:338499ms step_avg:145.97ms step:2330/3200 train_loss:3.4080 train_time:338644ms step_avg:145.97ms step:2331/3200 train_loss:3.3813 train_time:338788ms step_avg:145.97ms step:2332/3200 train_loss:3.5714 train_time:338932ms step_avg:145.97ms step:2333/3200 train_loss:3.4401 train_time:339076ms step_avg:145.96ms step:2334/3200 train_loss:3.4213 train_time:339222ms step_avg:145.96ms step:2335/3200 train_loss:3.5052 train_time:339365ms step_avg:145.96ms step:2336/3200 train_loss:3.3416 train_time:339509ms step_avg:145.96ms step:2337/3200 train_loss:3.4928 train_time:339652ms step_avg:145.96ms step:2338/3200 train_loss:3.4541 train_time:339797ms step_avg:145.96ms step:2339/3200 train_loss:3.3947 train_time:339943ms step_avg:145.96ms step:2340/3200 train_loss:3.4767 train_time:340087ms step_avg:145.96ms step:2341/3200 train_loss:3.5243 train_time:340230ms step_avg:145.96ms step:2342/3200 train_loss:3.3954 train_time:340373ms step_avg:145.96ms step:2343/3200 train_loss:3.4088 train_time:340517ms step_avg:145.96ms step:2344/3200 train_loss:3.4739 train_time:340662ms step_avg:145.96ms step:2345/3200 train_loss:3.4053 train_time:340806ms step_avg:145.96ms step:2346/3200 train_loss:3.5285 train_time:340950ms step_avg:145.95ms step:2347/3200 train_loss:3.4381 train_time:341094ms step_avg:145.95ms step:2348/3200 train_loss:3.5491 train_time:341240ms step_avg:145.95ms step:2349/3200 train_loss:3.5067 train_time:341384ms step_avg:145.95ms step:2350/3200 train_loss:3.5459 train_time:341527ms step_avg:145.95ms step:2351/3200 train_loss:3.2429 train_time:341670ms step_avg:145.95ms step:2352/3200 train_loss:3.3627 train_time:341814ms step_avg:145.95ms step:2353/3200 train_loss:3.3529 train_time:341961ms step_avg:145.95ms step:2354/3200 train_loss:3.5706 train_time:342105ms step_avg:145.95ms step:2355/3200 train_loss:3.3651 train_time:342248ms step_avg:145.95ms step:2356/3200 train_loss:3.3548 train_time:342392ms step_avg:145.95ms step:2357/3200 train_loss:3.5136 train_time:342537ms step_avg:145.95ms step:2358/3200 train_loss:3.3689 train_time:342681ms step_avg:145.95ms step:2359/3200 train_loss:3.4671 train_time:342826ms step_avg:145.95ms step:2360/3200 train_loss:3.3690 train_time:342970ms step_avg:145.94ms step:2361/3200 train_loss:3.3833 train_time:343115ms step_avg:145.94ms step:2362/3200 train_loss:3.4109 train_time:343261ms step_avg:145.94ms step:2363/3200 train_loss:3.4725 train_time:343404ms step_avg:145.94ms step:2364/3200 train_loss:3.4244 train_time:343547ms step_avg:145.94ms step:2365/3200 train_loss:3.8597 train_time:343690ms step_avg:145.94ms step:2366/3200 train_loss:3.4890 train_time:343835ms step_avg:145.94ms step:2367/3200 train_loss:3.6260 train_time:343980ms step_avg:145.94ms step:2368/3200 train_loss:3.4518 train_time:344124ms step_avg:145.94ms step:2369/3200 train_loss:3.4577 train_time:344268ms step_avg:145.94ms step:2370/3200 train_loss:3.4861 train_time:344412ms step_avg:145.94ms step:2371/3200 train_loss:3.3709 train_time:344554ms step_avg:145.94ms step:2372/3200 train_loss:3.6033 train_time:344699ms step_avg:145.94ms step:2373/3200 train_loss:3.4444 train_time:344844ms step_avg:145.93ms step:2374/3200 train_loss:4.0021 train_time:344986ms step_avg:145.93ms step:2375/3200 train_loss:3.4267 train_time:345130ms step_avg:145.93ms step:2375/3200 val_loss:3.4338 train_time:345181ms step_avg:145.95ms step:2376/3200 train_loss:3.3337 train_time:345283ms step_avg:145.94ms step:2377/3200 train_loss:3.4929 train_time:345431ms step_avg:145.94ms step:2378/3200 train_loss:3.4697 train_time:345575ms step_avg:145.94ms step:2379/3200 train_loss:3.4804 train_time:345718ms step_avg:145.93ms step:2380/3200 train_loss:3.4585 train_time:345860ms step_avg:145.93ms step:2381/3200 train_loss:3.3560 train_time:346004ms step_avg:145.93ms step:2382/3200 train_loss:3.4600 train_time:346147ms step_avg:145.93ms step:2383/3200 train_loss:3.4767 train_time:346292ms step_avg:145.93ms step:2384/3200 train_loss:3.4244 train_time:346439ms step_avg:145.93ms step:2385/3200 train_loss:3.3535 train_time:346583ms step_avg:145.93ms step:2386/3200 train_loss:3.4632 train_time:346728ms step_avg:145.93ms step:2387/3200 train_loss:3.4179 train_time:346871ms step_avg:145.93ms step:2388/3200 train_loss:3.4240 train_time:347013ms step_avg:145.93ms step:2389/3200 train_loss:3.4561 train_time:347156ms step_avg:145.93ms step:2390/3200 train_loss:3.4355 train_time:347301ms step_avg:145.92ms step:2391/3200 train_loss:3.4404 train_time:347448ms step_avg:145.93ms step:2392/3200 train_loss:3.3148 train_time:347592ms step_avg:145.92ms step:2393/3200 train_loss:3.5385 train_time:347736ms step_avg:145.92ms step:2394/3200 train_loss:3.3650 train_time:347880ms step_avg:145.92ms step:2395/3200 train_loss:3.4790 train_time:348026ms step_avg:145.92ms step:2396/3200 train_loss:3.5845 train_time:348170ms step_avg:145.92ms step:2397/3200 train_loss:3.5945 train_time:348313ms step_avg:145.92ms step:2398/3200 train_loss:3.5559 train_time:348456ms step_avg:145.92ms step:2399/3200 train_loss:3.5210 train_time:348602ms step_avg:145.92ms step:2400/3200 train_loss:3.3921 train_time:348747ms step_avg:145.92ms step:2401/3200 train_loss:3.3938 train_time:348890ms step_avg:145.92ms step:2402/3200 train_loss:3.5010 train_time:349035ms step_avg:145.92ms step:2403/3200 train_loss:3.3347 train_time:349179ms step_avg:145.92ms step:2404/3200 train_loss:3.4626 train_time:349323ms step_avg:145.92ms step:2405/3200 train_loss:3.6844 train_time:349469ms step_avg:145.92ms step:2406/3200 train_loss:3.4084 train_time:349612ms step_avg:145.91ms step:2407/3200 train_loss:3.5579 train_time:349755ms step_avg:145.91ms step:2408/3200 train_loss:3.4163 train_time:349900ms step_avg:145.91ms step:2409/3200 train_loss:3.3524 train_time:350045ms step_avg:145.91ms step:2410/3200 train_loss:3.4894 train_time:350189ms step_avg:145.91ms step:2411/3200 train_loss:3.2744 train_time:350333ms step_avg:145.91ms step:2412/3200 train_loss:3.7060 train_time:350477ms step_avg:145.91ms step:2413/3200 train_loss:3.3944 train_time:350621ms step_avg:145.91ms step:2414/3200 train_loss:3.4722 train_time:350766ms step_avg:145.91ms step:2415/3200 train_loss:3.3889 train_time:350910ms step_avg:145.91ms step:2416/3200 train_loss:3.4629 train_time:351053ms step_avg:145.91ms step:2417/3200 train_loss:3.2762 train_time:351196ms step_avg:145.91ms step:2418/3200 train_loss:3.2033 train_time:351342ms step_avg:145.91ms step:2419/3200 train_loss:3.5050 train_time:351487ms step_avg:145.91ms step:2420/3200 train_loss:3.3840 train_time:351632ms step_avg:145.91ms step:2421/3200 train_loss:3.4081 train_time:351775ms step_avg:145.90ms step:2422/3200 train_loss:3.5161 train_time:351918ms step_avg:145.90ms step:2423/3200 train_loss:3.5615 train_time:352061ms step_avg:145.90ms step:2424/3200 train_loss:3.3750 train_time:352208ms step_avg:145.90ms step:2425/3200 train_loss:3.4728 train_time:352351ms step_avg:145.90ms step:2426/3200 train_loss:3.4693 train_time:352494ms step_avg:145.90ms step:2427/3200 train_loss:3.3962 train_time:352638ms step_avg:145.90ms step:2428/3200 train_loss:3.3402 train_time:352783ms step_avg:145.90ms step:2429/3200 train_loss:3.4770 train_time:352929ms step_avg:145.90ms step:2430/3200 train_loss:3.3652 train_time:353073ms step_avg:145.90ms step:2431/3200 train_loss:3.4287 train_time:353216ms step_avg:145.90ms step:2432/3200 train_loss:3.4855 train_time:353360ms step_avg:145.90ms step:2433/3200 train_loss:3.4492 train_time:353507ms step_avg:145.90ms step:2434/3200 train_loss:3.3155 train_time:353651ms step_avg:145.90ms step:2435/3200 train_loss:3.2892 train_time:353794ms step_avg:145.89ms step:2436/3200 train_loss:3.4523 train_time:353938ms step_avg:145.89ms step:2437/3200 train_loss:3.3049 train_time:354081ms step_avg:145.89ms step:2438/3200 train_loss:3.3836 train_time:354227ms step_avg:145.89ms step:2439/3200 train_loss:3.4810 train_time:354371ms step_avg:145.89ms step:2440/3200 train_loss:3.3977 train_time:354515ms step_avg:145.89ms step:2441/3200 train_loss:3.4813 train_time:354658ms step_avg:145.89ms step:2442/3200 train_loss:3.3721 train_time:354803ms step_avg:145.89ms step:2443/3200 train_loss:3.4226 train_time:354948ms step_avg:145.89ms step:2444/3200 train_loss:3.3076 train_time:355093ms step_avg:145.89ms step:2445/3200 train_loss:3.3211 train_time:355235ms step_avg:145.89ms step:2446/3200 train_loss:3.4830 train_time:355381ms step_avg:145.89ms step:2447/3200 train_loss:3.3524 train_time:355527ms step_avg:145.89ms step:2448/3200 train_loss:3.4153 train_time:355672ms step_avg:145.89ms step:2449/3200 train_loss:3.5816 train_time:355815ms step_avg:145.89ms step:2450/3200 train_loss:3.4122 train_time:355959ms step_avg:145.88ms step:2451/3200 train_loss:3.4762 train_time:356103ms step_avg:145.88ms step:2452/3200 train_loss:3.3871 train_time:356248ms step_avg:145.88ms step:2453/3200 train_loss:3.4862 train_time:356392ms step_avg:145.88ms step:2454/3200 train_loss:3.3796 train_time:356536ms step_avg:145.88ms step:2455/3200 train_loss:3.5116 train_time:356682ms step_avg:145.88ms step:2456/3200 train_loss:3.4427 train_time:356828ms step_avg:145.88ms step:2457/3200 train_loss:3.3672 train_time:357127ms step_avg:145.94ms step:2458/3200 train_loss:3.2845 train_time:357273ms step_avg:145.94ms step:2459/3200 train_loss:3.4188 train_time:357415ms step_avg:145.94ms step:2460/3200 train_loss:4.0161 train_time:357558ms step_avg:145.94ms step:2461/3200 train_loss:3.4765 train_time:357700ms step_avg:145.94ms step:2462/3200 train_loss:3.2940 train_time:357843ms step_avg:145.94ms step:2463/3200 train_loss:3.4969 train_time:357989ms step_avg:145.94ms step:2464/3200 train_loss:3.4062 train_time:358138ms step_avg:145.94ms step:2465/3200 train_loss:3.6077 train_time:358283ms step_avg:145.94ms step:2466/3200 train_loss:3.7763 train_time:358427ms step_avg:145.94ms step:2467/3200 train_loss:3.5265 train_time:358570ms step_avg:145.94ms step:2468/3200 train_loss:3.3963 train_time:358712ms step_avg:145.94ms step:2469/3200 train_loss:3.5156 train_time:358854ms step_avg:145.94ms step:2470/3200 train_loss:3.5269 train_time:359209ms step_avg:146.02ms step:2471/3200 train_loss:3.3240 train_time:359350ms step_avg:146.02ms step:2472/3200 train_loss:3.4144 train_time:359494ms step_avg:146.02ms step:2473/3200 train_loss:3.4136 train_time:359636ms step_avg:146.02ms step:2474/3200 train_loss:3.5570 train_time:359779ms step_avg:146.01ms step:2475/3200 train_loss:3.6853 train_time:359922ms step_avg:146.01ms step:2476/3200 train_loss:3.2764 train_time:360065ms step_avg:146.01ms step:2477/3200 train_loss:3.4884 train_time:360212ms step_avg:146.01ms step:2478/3200 train_loss:3.4493 train_time:360356ms step_avg:146.01ms step:2479/3200 train_loss:3.2863 train_time:360500ms step_avg:146.01ms step:2480/3200 train_loss:3.2872 train_time:360645ms step_avg:146.01ms step:2481/3200 train_loss:3.4294 train_time:360789ms step_avg:146.01ms step:2482/3200 train_loss:3.4445 train_time:360933ms step_avg:146.01ms step:2483/3200 train_loss:3.4617 train_time:361076ms step_avg:146.01ms step:2484/3200 train_loss:3.4139 train_time:361220ms step_avg:146.01ms step:2485/3200 train_loss:3.4212 train_time:361368ms step_avg:146.01ms step:2486/3200 train_loss:3.3072 train_time:361512ms step_avg:146.01ms step:2487/3200 train_loss:3.5085 train_time:361654ms step_avg:146.00ms step:2488/3200 train_loss:3.4605 train_time:361799ms step_avg:146.00ms step:2489/3200 train_loss:3.3672 train_time:361944ms step_avg:146.00ms step:2490/3200 train_loss:3.4793 train_time:362088ms step_avg:146.00ms step:2491/3200 train_loss:3.5184 train_time:362231ms step_avg:146.00ms step:2492/3200 train_loss:3.6061 train_time:362375ms step_avg:146.00ms step:2493/3200 train_loss:3.4589 train_time:362520ms step_avg:146.00ms step:2494/3200 train_loss:3.3845 train_time:362666ms step_avg:146.00ms step:2495/3200 train_loss:3.5057 train_time:362812ms step_avg:146.00ms step:2496/3200 train_loss:3.4593 train_time:362954ms step_avg:146.00ms step:2497/3200 train_loss:3.3591 train_time:363098ms step_avg:146.00ms step:2498/3200 train_loss:3.4632 train_time:363243ms step_avg:146.00ms step:2499/3200 train_loss:3.5157 train_time:363389ms step_avg:146.00ms step:2500/3200 train_loss:3.5356 train_time:363533ms step_avg:146.00ms step:2500/3200 val_loss:3.4098 train_time:363584ms step_avg:146.02ms step:2501/3200 train_loss:3.4765 train_time:363690ms step_avg:146.00ms step:2502/3200 train_loss:3.4369 train_time:363838ms step_avg:146.00ms step:2503/3200 train_loss:3.4506 train_time:363981ms step_avg:146.00ms step:2504/3200 train_loss:3.3176 train_time:364122ms step_avg:146.00ms step:2505/3200 train_loss:3.5170 train_time:364266ms step_avg:146.00ms step:2506/3200 train_loss:3.4642 train_time:364409ms step_avg:146.00ms step:2507/3200 train_loss:3.4097 train_time:364551ms step_avg:146.00ms step:2508/3200 train_loss:3.4118 train_time:364699ms step_avg:146.00ms step:2509/3200 train_loss:3.3701 train_time:364846ms step_avg:146.00ms step:2510/3200 train_loss:3.5501 train_time:364988ms step_avg:146.00ms step:2511/3200 train_loss:3.3735 train_time:365132ms step_avg:145.99ms step:2512/3200 train_loss:3.3618 train_time:365275ms step_avg:145.99ms step:2513/3200 train_loss:3.4464 train_time:365419ms step_avg:145.99ms step:2514/3200 train_loss:3.4710 train_time:365562ms step_avg:145.99ms step:2515/3200 train_loss:3.3699 train_time:365706ms step_avg:145.99ms step:2516/3200 train_loss:3.4582 train_time:365853ms step_avg:145.99ms step:2517/3200 train_loss:3.4510 train_time:365999ms step_avg:145.99ms step:2518/3200 train_loss:3.3288 train_time:366143ms step_avg:145.99ms step:2519/3200 train_loss:3.3580 train_time:366286ms step_avg:145.99ms step:2520/3200 train_loss:3.4807 train_time:366428ms step_avg:145.99ms step:2521/3200 train_loss:3.4698 train_time:366572ms step_avg:145.99ms step:2522/3200 train_loss:3.3575 train_time:366719ms step_avg:145.99ms step:2523/3200 train_loss:3.3279 train_time:366864ms step_avg:145.99ms step:2524/3200 train_loss:3.4297 train_time:367008ms step_avg:145.99ms step:2525/3200 train_loss:3.2821 train_time:367153ms step_avg:145.99ms step:2526/3200 train_loss:3.4983 train_time:367298ms step_avg:145.99ms step:2527/3200 train_loss:3.4039 train_time:367441ms step_avg:145.98ms step:2528/3200 train_loss:3.4113 train_time:367584ms step_avg:145.98ms step:2529/3200 train_loss:3.3926 train_time:367728ms step_avg:145.98ms step:2530/3200 train_loss:3.4121 train_time:367874ms step_avg:145.98ms step:2531/3200 train_loss:3.4515 train_time:368019ms step_avg:145.98ms step:2532/3200 train_loss:3.2725 train_time:368163ms step_avg:145.98ms step:2533/3200 train_loss:3.4335 train_time:368307ms step_avg:145.98ms step:2534/3200 train_loss:3.3271 train_time:368451ms step_avg:145.98ms step:2535/3200 train_loss:3.3643 train_time:368596ms step_avg:145.98ms step:2536/3200 train_loss:3.4176 train_time:368739ms step_avg:145.98ms step:2537/3200 train_loss:3.4301 train_time:368885ms step_avg:145.98ms step:2538/3200 train_loss:3.2600 train_time:369028ms step_avg:145.98ms step:2539/3200 train_loss:3.5643 train_time:369175ms step_avg:145.98ms step:2540/3200 train_loss:3.2498 train_time:369320ms step_avg:145.98ms step:2541/3200 train_loss:3.4334 train_time:369463ms step_avg:145.98ms step:2542/3200 train_loss:3.1923 train_time:369607ms step_avg:145.97ms step:2543/3200 train_loss:3.6340 train_time:369752ms step_avg:145.97ms step:2544/3200 train_loss:3.4046 train_time:369898ms step_avg:145.97ms step:2545/3200 train_loss:3.5575 train_time:370042ms step_avg:145.97ms step:2546/3200 train_loss:3.3975 train_time:370185ms step_avg:145.97ms step:2547/3200 train_loss:3.3758 train_time:370327ms step_avg:145.97ms step:2548/3200 train_loss:3.3829 train_time:370474ms step_avg:145.97ms step:2549/3200 train_loss:3.5456 train_time:370620ms step_avg:145.97ms step:2550/3200 train_loss:3.3956 train_time:370763ms step_avg:145.97ms step:2551/3200 train_loss:3.3962 train_time:370907ms step_avg:145.97ms step:2552/3200 train_loss:3.4244 train_time:371051ms step_avg:145.97ms step:2553/3200 train_loss:3.4501 train_time:371194ms step_avg:145.97ms step:2554/3200 train_loss:3.3527 train_time:371339ms step_avg:145.97ms step:2555/3200 train_loss:3.4629 train_time:371483ms step_avg:145.97ms step:2556/3200 train_loss:3.5146 train_time:371626ms step_avg:145.96ms step:2557/3200 train_loss:3.5062 train_time:371777ms step_avg:145.97ms step:2558/3200 train_loss:3.3498 train_time:371916ms step_avg:145.96ms step:2559/3200 train_loss:3.3529 train_time:372061ms step_avg:145.96ms step:2560/3200 train_loss:3.3569 train_time:372204ms step_avg:145.96ms step:2561/3200 train_loss:3.4843 train_time:372348ms step_avg:145.96ms step:2562/3200 train_loss:3.5165 train_time:372493ms step_avg:145.96ms step:2563/3200 train_loss:3.3967 train_time:372639ms step_avg:145.96ms step:2564/3200 train_loss:3.4264 train_time:372784ms step_avg:145.96ms step:2565/3200 train_loss:3.3436 train_time:372927ms step_avg:145.96ms step:2566/3200 train_loss:3.3572 train_time:373071ms step_avg:145.96ms step:2567/3200 train_loss:3.3551 train_time:373218ms step_avg:145.96ms step:2568/3200 train_loss:3.4010 train_time:373361ms step_avg:145.96ms step:2569/3200 train_loss:3.5408 train_time:373506ms step_avg:145.96ms step:2570/3200 train_loss:3.4439 train_time:373650ms step_avg:145.96ms step:2571/3200 train_loss:3.5255 train_time:373795ms step_avg:145.96ms step:2572/3200 train_loss:3.2894 train_time:373939ms step_avg:145.96ms step:2573/3200 train_loss:3.3907 train_time:374084ms step_avg:145.96ms step:2574/3200 train_loss:3.0417 train_time:374226ms step_avg:145.95ms step:2575/3200 train_loss:3.3029 train_time:374370ms step_avg:145.95ms step:2576/3200 train_loss:3.2371 train_time:374514ms step_avg:145.95ms step:2577/3200 train_loss:3.3494 train_time:374658ms step_avg:145.95ms step:2578/3200 train_loss:3.4063 train_time:374802ms step_avg:145.95ms step:2579/3200 train_loss:3.3131 train_time:374946ms step_avg:145.95ms step:2580/3200 train_loss:3.3701 train_time:375089ms step_avg:145.95ms step:2581/3200 train_loss:3.3109 train_time:375232ms step_avg:145.95ms step:2582/3200 train_loss:3.4242 train_time:375379ms step_avg:145.95ms step:2583/3200 train_loss:3.3012 train_time:375522ms step_avg:145.95ms step:2584/3200 train_loss:3.4926 train_time:375666ms step_avg:145.95ms step:2585/3200 train_loss:3.3995 train_time:375811ms step_avg:145.95ms step:2586/3200 train_loss:3.4124 train_time:375955ms step_avg:145.95ms step:2587/3200 train_loss:3.5404 train_time:376103ms step_avg:145.95ms step:2588/3200 train_loss:3.4264 train_time:376244ms step_avg:145.94ms step:2589/3200 train_loss:3.2876 train_time:376387ms step_avg:145.94ms step:2590/3200 train_loss:3.4587 train_time:376531ms step_avg:145.94ms step:2591/3200 train_loss:3.3611 train_time:376677ms step_avg:145.94ms step:2592/3200 train_loss:3.5714 train_time:376820ms step_avg:145.94ms step:2593/3200 train_loss:3.4325 train_time:376963ms step_avg:145.94ms step:2594/3200 train_loss:3.2581 train_time:377107ms step_avg:145.94ms step:2595/3200 train_loss:3.3244 train_time:377250ms step_avg:145.94ms step:2596/3200 train_loss:3.7609 train_time:377395ms step_avg:145.94ms step:2597/3200 train_loss:3.4181 train_time:377540ms step_avg:145.94ms step:2598/3200 train_loss:3.4126 train_time:377684ms step_avg:145.94ms step:2599/3200 train_loss:3.2712 train_time:377828ms step_avg:145.94ms step:2600/3200 train_loss:3.5085 train_time:377973ms step_avg:145.94ms step:2601/3200 train_loss:3.6741 train_time:378120ms step_avg:145.94ms step:2602/3200 train_loss:3.2545 train_time:378263ms step_avg:145.93ms step:2603/3200 train_loss:3.3942 train_time:378406ms step_avg:145.93ms step:2604/3200 train_loss:3.2367 train_time:378550ms step_avg:145.93ms step:2605/3200 train_loss:3.5314 train_time:378694ms step_avg:145.93ms step:2606/3200 train_loss:3.3907 train_time:378838ms step_avg:145.93ms step:2607/3200 train_loss:3.2813 train_time:378984ms step_avg:145.93ms step:2608/3200 train_loss:3.2493 train_time:379126ms step_avg:145.93ms step:2609/3200 train_loss:3.3580 train_time:379271ms step_avg:145.93ms step:2610/3200 train_loss:3.5393 train_time:379417ms step_avg:145.93ms step:2611/3200 train_loss:3.4100 train_time:379561ms step_avg:145.93ms step:2612/3200 train_loss:3.2487 train_time:379705ms step_avg:145.93ms step:2613/3200 train_loss:3.3380 train_time:379850ms step_avg:145.93ms step:2614/3200 train_loss:3.4491 train_time:379995ms step_avg:145.93ms step:2615/3200 train_loss:3.3838 train_time:380141ms step_avg:145.93ms step:2616/3200 train_loss:3.3828 train_time:380285ms step_avg:145.93ms step:2617/3200 train_loss:3.4234 train_time:380427ms step_avg:145.93ms step:2618/3200 train_loss:3.4555 train_time:380571ms step_avg:145.92ms step:2619/3200 train_loss:3.3081 train_time:380718ms step_avg:145.92ms step:2620/3200 train_loss:3.4802 train_time:380862ms step_avg:145.92ms step:2621/3200 train_loss:3.4396 train_time:381006ms step_avg:145.92ms step:2622/3200 train_loss:3.5686 train_time:381150ms step_avg:145.92ms step:2623/3200 train_loss:3.4788 train_time:381295ms step_avg:145.92ms step:2624/3200 train_loss:3.4024 train_time:381440ms step_avg:145.92ms step:2625/3200 train_loss:3.3539 train_time:381584ms step_avg:145.92ms step:2625/3200 val_loss:3.3860 train_time:381634ms step_avg:145.94ms step:2626/3200 train_loss:3.3797 train_time:381739ms step_avg:145.92ms step:2627/3200 train_loss:3.4425 train_time:381885ms step_avg:145.92ms step:2628/3200 train_loss:3.2547 train_time:382028ms step_avg:145.92ms step:2629/3200 train_loss:3.5295 train_time:382171ms step_avg:145.92ms step:2630/3200 train_loss:3.4101 train_time:382313ms step_avg:145.92ms step:2631/3200 train_loss:3.4560 train_time:382456ms step_avg:145.92ms step:2632/3200 train_loss:3.6843 train_time:382599ms step_avg:145.92ms step:2633/3200 train_loss:3.4362 train_time:382747ms step_avg:145.92ms step:2634/3200 train_loss:3.3547 train_time:382893ms step_avg:145.92ms step:2635/3200 train_loss:3.3198 train_time:383038ms step_avg:145.92ms step:2636/3200 train_loss:3.3655 train_time:383183ms step_avg:145.92ms step:2637/3200 train_loss:3.1550 train_time:383326ms step_avg:145.92ms step:2638/3200 train_loss:3.4616 train_time:383469ms step_avg:145.92ms step:2639/3200 train_loss:3.4367 train_time:383612ms step_avg:145.92ms step:2640/3200 train_loss:3.3277 train_time:383757ms step_avg:145.92ms step:2641/3200 train_loss:3.4122 train_time:383905ms step_avg:145.92ms step:2642/3200 train_loss:3.4443 train_time:384049ms step_avg:145.92ms step:2643/3200 train_loss:3.2403 train_time:384191ms step_avg:145.91ms step:2644/3200 train_loss:3.3619 train_time:384337ms step_avg:145.91ms step:2645/3200 train_loss:3.4347 train_time:384481ms step_avg:145.91ms step:2646/3200 train_loss:3.3939 train_time:384788ms step_avg:145.97ms step:2647/3200 train_loss:3.2935 train_time:384935ms step_avg:145.97ms step:2648/3200 train_loss:3.5067 train_time:385078ms step_avg:145.97ms step:2649/3200 train_loss:3.7625 train_time:385223ms step_avg:145.97ms step:2650/3200 train_loss:3.4035 train_time:385366ms step_avg:145.97ms step:2651/3200 train_loss:3.3700 train_time:385508ms step_avg:145.97ms step:2652/3200 train_loss:3.5002 train_time:385652ms step_avg:145.97ms step:2653/3200 train_loss:3.3326 train_time:385802ms step_avg:145.97ms step:2654/3200 train_loss:3.3222 train_time:385947ms step_avg:145.97ms step:2655/3200 train_loss:3.3969 train_time:386090ms step_avg:145.97ms step:2656/3200 train_loss:3.3129 train_time:386234ms step_avg:145.97ms step:2657/3200 train_loss:3.3446 train_time:386378ms step_avg:145.97ms step:2658/3200 train_loss:3.3170 train_time:386522ms step_avg:145.97ms step:2659/3200 train_loss:3.4054 train_time:386666ms step_avg:145.97ms step:2660/3200 train_loss:3.5417 train_time:386990ms step_avg:146.03ms step:2661/3200 train_loss:3.3384 train_time:387131ms step_avg:146.03ms step:2662/3200 train_loss:3.4841 train_time:387272ms step_avg:146.03ms step:2663/3200 train_loss:3.3521 train_time:387415ms step_avg:146.03ms step:2664/3200 train_loss:3.3502 train_time:387559ms step_avg:146.03ms step:2665/3200 train_loss:3.2820 train_time:387702ms step_avg:146.03ms step:2666/3200 train_loss:3.3290 train_time:387846ms step_avg:146.03ms step:2667/3200 train_loss:3.3701 train_time:387993ms step_avg:146.03ms step:2668/3200 train_loss:3.4080 train_time:388139ms step_avg:146.03ms step:2669/3200 train_loss:3.3230 train_time:388284ms step_avg:146.03ms step:2670/3200 train_loss:3.3833 train_time:388428ms step_avg:146.03ms step:2671/3200 train_loss:3.2714 train_time:388570ms step_avg:146.02ms step:2672/3200 train_loss:3.3363 train_time:388713ms step_avg:146.02ms step:2673/3200 train_loss:3.3259 train_time:388857ms step_avg:146.02ms step:2674/3200 train_loss:3.3936 train_time:389004ms step_avg:146.02ms step:2675/3200 train_loss:3.4142 train_time:389148ms step_avg:146.02ms step:2676/3200 train_loss:3.3738 train_time:389291ms step_avg:146.02ms step:2677/3200 train_loss:3.3677 train_time:389434ms step_avg:146.02ms step:2678/3200 train_loss:3.4014 train_time:389577ms step_avg:146.02ms step:2679/3200 train_loss:3.4458 train_time:389722ms step_avg:146.02ms step:2680/3200 train_loss:3.3505 train_time:389866ms step_avg:146.02ms step:2681/3200 train_loss:3.2800 train_time:390010ms step_avg:146.02ms step:2682/3200 train_loss:3.3215 train_time:390154ms step_avg:146.02ms step:2683/3200 train_loss:3.7933 train_time:390299ms step_avg:146.02ms step:2684/3200 train_loss:3.3823 train_time:390444ms step_avg:146.02ms step:2685/3200 train_loss:3.4144 train_time:390588ms step_avg:146.01ms step:2686/3200 train_loss:3.4572 train_time:390732ms step_avg:146.01ms step:2687/3200 train_loss:3.3769 train_time:390877ms step_avg:146.01ms step:2688/3200 train_loss:3.4535 train_time:391023ms step_avg:146.01ms step:2689/3200 train_loss:3.3885 train_time:391167ms step_avg:146.01ms step:2690/3200 train_loss:3.3704 train_time:391311ms step_avg:146.01ms step:2691/3200 train_loss:3.4030 train_time:391454ms step_avg:146.01ms step:2692/3200 train_loss:3.4685 train_time:391597ms step_avg:146.01ms step:2693/3200 train_loss:3.2667 train_time:391743ms step_avg:146.01ms step:2694/3200 train_loss:3.6518 train_time:391886ms step_avg:146.01ms step:2695/3200 train_loss:3.4473 train_time:392030ms step_avg:146.01ms step:2696/3200 train_loss:3.2389 train_time:392173ms step_avg:146.01ms step:2697/3200 train_loss:3.4389 train_time:392318ms step_avg:146.01ms step:2698/3200 train_loss:3.4007 train_time:392464ms step_avg:146.01ms step:2699/3200 train_loss:3.3521 train_time:392607ms step_avg:146.00ms step:2700/3200 train_loss:3.4560 train_time:392751ms step_avg:146.00ms step:2701/3200 train_loss:3.4271 train_time:392895ms step_avg:146.00ms step:2702/3200 train_loss:3.3288 train_time:393041ms step_avg:146.00ms step:2703/3200 train_loss:3.3556 train_time:393185ms step_avg:146.00ms step:2704/3200 train_loss:3.3634 train_time:393329ms step_avg:146.00ms step:2705/3200 train_loss:3.3341 train_time:393472ms step_avg:146.00ms step:2706/3200 train_loss:3.4969 train_time:393617ms step_avg:146.00ms step:2707/3200 train_loss:3.4650 train_time:393760ms step_avg:146.00ms step:2708/3200 train_loss:3.3706 train_time:393904ms step_avg:146.00ms step:2709/3200 train_loss:3.3643 train_time:394048ms step_avg:146.00ms step:2710/3200 train_loss:3.4677 train_time:394191ms step_avg:146.00ms step:2711/3200 train_loss:3.3471 train_time:394336ms step_avg:146.00ms step:2712/3200 train_loss:3.4642 train_time:394483ms step_avg:146.00ms step:2713/3200 train_loss:3.1998 train_time:394626ms step_avg:146.00ms step:2714/3200 train_loss:3.3964 train_time:394769ms step_avg:145.99ms step:2715/3200 train_loss:3.2839 train_time:394912ms step_avg:145.99ms step:2716/3200 train_loss:3.3012 train_time:395055ms step_avg:145.99ms step:2717/3200 train_loss:3.4874 train_time:395201ms step_avg:145.99ms step:2718/3200 train_loss:3.3907 train_time:395346ms step_avg:145.99ms step:2719/3200 train_loss:3.6142 train_time:395489ms step_avg:145.99ms step:2720/3200 train_loss:3.3634 train_time:395633ms step_avg:145.99ms step:2721/3200 train_loss:3.3532 train_time:395779ms step_avg:145.99ms step:2722/3200 train_loss:3.5836 train_time:395925ms step_avg:145.99ms step:2723/3200 train_loss:3.3518 train_time:396069ms step_avg:145.99ms step:2724/3200 train_loss:3.5237 train_time:396212ms step_avg:145.99ms step:2725/3200 train_loss:3.4081 train_time:396355ms step_avg:145.99ms step:2726/3200 train_loss:3.3666 train_time:396499ms step_avg:145.99ms step:2727/3200 train_loss:3.3776 train_time:396644ms step_avg:145.99ms step:2728/3200 train_loss:3.7098 train_time:396787ms step_avg:145.98ms step:2729/3200 train_loss:3.4428 train_time:396932ms step_avg:145.98ms step:2730/3200 train_loss:3.3034 train_time:397076ms step_avg:145.98ms step:2731/3200 train_loss:3.4133 train_time:397223ms step_avg:145.98ms step:2732/3200 train_loss:3.3267 train_time:397366ms step_avg:145.98ms step:2733/3200 train_loss:3.2098 train_time:397510ms step_avg:145.98ms step:2734/3200 train_loss:3.3236 train_time:397653ms step_avg:145.98ms step:2735/3200 train_loss:3.3967 train_time:397798ms step_avg:145.98ms step:2736/3200 train_loss:3.2806 train_time:397945ms step_avg:145.98ms step:2737/3200 train_loss:3.6899 train_time:398088ms step_avg:145.98ms step:2738/3200 train_loss:3.4359 train_time:398232ms step_avg:145.98ms step:2739/3200 train_loss:3.6307 train_time:398377ms step_avg:145.98ms step:2740/3200 train_loss:3.3768 train_time:398522ms step_avg:145.98ms step:2741/3200 train_loss:3.3765 train_time:398666ms step_avg:145.98ms step:2742/3200 train_loss:3.3200 train_time:398811ms step_avg:145.98ms step:2743/3200 train_loss:3.3871 train_time:398955ms step_avg:145.98ms step:2744/3200 train_loss:3.3954 train_time:399100ms step_avg:145.98ms step:2745/3200 train_loss:3.5074 train_time:399245ms step_avg:145.98ms step:2746/3200 train_loss:3.2687 train_time:399387ms step_avg:145.97ms step:2747/3200 train_loss:3.3570 train_time:399531ms step_avg:145.97ms step:2748/3200 train_loss:3.3961 train_time:399675ms step_avg:145.97ms step:2749/3200 train_loss:3.5056 train_time:399820ms step_avg:145.97ms step:2750/3200 train_loss:3.3462 train_time:399966ms step_avg:145.97ms step:2750/3200 val_loss:3.3647 train_time:400017ms step_avg:145.99ms step:2751/3200 train_loss:3.4317 train_time:400122ms step_avg:145.98ms step:2752/3200 train_loss:3.4788 train_time:400268ms step_avg:145.98ms step:2753/3200 train_loss:3.3871 train_time:400415ms step_avg:145.98ms step:2754/3200 train_loss:3.3184 train_time:400556ms step_avg:145.98ms step:2755/3200 train_loss:3.3164 train_time:400698ms step_avg:145.97ms step:2756/3200 train_loss:3.3970 train_time:400840ms step_avg:145.97ms step:2757/3200 train_loss:3.3390 train_time:400983ms step_avg:145.97ms step:2758/3200 train_loss:3.2158 train_time:401130ms step_avg:145.97ms step:2759/3200 train_loss:3.6080 train_time:401276ms step_avg:145.97ms step:2760/3200 train_loss:3.4203 train_time:401420ms step_avg:145.97ms step:2761/3200 train_loss:3.3870 train_time:401564ms step_avg:145.97ms step:2762/3200 train_loss:3.3531 train_time:401707ms step_avg:145.97ms step:2763/3200 train_loss:3.2593 train_time:401850ms step_avg:145.97ms step:2764/3200 train_loss:3.4290 train_time:401995ms step_avg:145.97ms step:2765/3200 train_loss:3.3579 train_time:402139ms step_avg:145.97ms step:2766/3200 train_loss:3.2496 train_time:402284ms step_avg:145.97ms step:2767/3200 train_loss:3.3461 train_time:402429ms step_avg:145.97ms step:2768/3200 train_loss:3.4214 train_time:402575ms step_avg:145.97ms step:2769/3200 train_loss:3.2987 train_time:402720ms step_avg:145.97ms step:2770/3200 train_loss:3.3810 train_time:402863ms step_avg:145.96ms step:2771/3200 train_loss:3.3537 train_time:403008ms step_avg:145.96ms step:2772/3200 train_loss:3.7903 train_time:403154ms step_avg:145.96ms step:2773/3200 train_loss:3.2665 train_time:403300ms step_avg:145.96ms step:2774/3200 train_loss:3.4065 train_time:403444ms step_avg:145.96ms step:2775/3200 train_loss:3.4589 train_time:403588ms step_avg:145.96ms step:2776/3200 train_loss:3.4250 train_time:403734ms step_avg:145.96ms step:2777/3200 train_loss:3.4946 train_time:403877ms step_avg:145.96ms step:2778/3200 train_loss:3.5047 train_time:404020ms step_avg:145.96ms step:2779/3200 train_loss:3.3773 train_time:404164ms step_avg:145.96ms step:2780/3200 train_loss:3.2383 train_time:404310ms step_avg:145.96ms step:2781/3200 train_loss:3.3844 train_time:404455ms step_avg:145.96ms step:2782/3200 train_loss:3.4128 train_time:404599ms step_avg:145.96ms step:2783/3200 train_loss:3.2803 train_time:404742ms step_avg:145.96ms step:2784/3200 train_loss:3.3738 train_time:404887ms step_avg:145.96ms step:2785/3200 train_loss:3.4366 train_time:405031ms step_avg:145.96ms step:2786/3200 train_loss:3.3181 train_time:405175ms step_avg:145.96ms step:2787/3200 train_loss:3.4316 train_time:405319ms step_avg:145.96ms step:2788/3200 train_loss:3.3947 train_time:405464ms step_avg:145.96ms step:2789/3200 train_loss:3.3281 train_time:405610ms step_avg:145.96ms step:2790/3200 train_loss:3.4156 train_time:405755ms step_avg:145.96ms step:2791/3200 train_loss:3.3434 train_time:405899ms step_avg:145.95ms step:2792/3200 train_loss:3.2456 train_time:406043ms step_avg:145.95ms step:2793/3200 train_loss:3.3365 train_time:406186ms step_avg:145.95ms step:2794/3200 train_loss:3.3838 train_time:406332ms step_avg:145.95ms step:2795/3200 train_loss:3.3045 train_time:406476ms step_avg:145.95ms step:2796/3200 train_loss:3.3388 train_time:406620ms step_avg:145.95ms step:2797/3200 train_loss:3.2528 train_time:406764ms step_avg:145.95ms step:2798/3200 train_loss:3.3632 train_time:406909ms step_avg:145.95ms step:2799/3200 train_loss:3.3223 train_time:407054ms step_avg:145.95ms step:2800/3200 train_loss:3.4835 train_time:407198ms step_avg:145.95ms step:2801/3200 train_loss:3.4286 train_time:407341ms step_avg:145.95ms step:2802/3200 train_loss:3.4094 train_time:407485ms step_avg:145.95ms step:2803/3200 train_loss:3.3530 train_time:407630ms step_avg:145.95ms step:2804/3200 train_loss:3.5237 train_time:407776ms step_avg:145.95ms step:2805/3200 train_loss:3.4970 train_time:407920ms step_avg:145.95ms step:2806/3200 train_loss:3.2273 train_time:408063ms step_avg:145.95ms step:2807/3200 train_loss:3.6238 train_time:408207ms step_avg:145.94ms step:2808/3200 train_loss:3.3685 train_time:408352ms step_avg:145.94ms step:2809/3200 train_loss:3.3015 train_time:408496ms step_avg:145.94ms step:2810/3200 train_loss:3.3148 train_time:408640ms step_avg:145.94ms step:2811/3200 train_loss:3.4822 train_time:408784ms step_avg:145.94ms step:2812/3200 train_loss:3.4670 train_time:408928ms step_avg:145.94ms step:2813/3200 train_loss:3.2182 train_time:409074ms step_avg:145.94ms step:2814/3200 train_loss:3.4460 train_time:409218ms step_avg:145.94ms step:2815/3200 train_loss:3.5107 train_time:409361ms step_avg:145.94ms step:2816/3200 train_loss:3.3186 train_time:409506ms step_avg:145.94ms step:2817/3200 train_loss:2.9516 train_time:409651ms step_avg:145.94ms step:2818/3200 train_loss:3.3412 train_time:409795ms step_avg:145.94ms step:2819/3200 train_loss:3.3071 train_time:409939ms step_avg:145.94ms step:2820/3200 train_loss:3.5011 train_time:410083ms step_avg:145.94ms step:2821/3200 train_loss:3.3540 train_time:410227ms step_avg:145.94ms step:2822/3200 train_loss:3.4237 train_time:410372ms step_avg:145.94ms step:2823/3200 train_loss:3.3651 train_time:410517ms step_avg:145.94ms step:2824/3200 train_loss:3.3287 train_time:410660ms step_avg:145.93ms step:2825/3200 train_loss:3.2321 train_time:410804ms step_avg:145.93ms step:2826/3200 train_loss:3.4917 train_time:410947ms step_avg:145.93ms step:2827/3200 train_loss:3.3808 train_time:411093ms step_avg:145.93ms step:2828/3200 train_loss:3.2714 train_time:411237ms step_avg:145.93ms step:2829/3200 train_loss:3.4053 train_time:411381ms step_avg:145.93ms step:2830/3200 train_loss:3.3933 train_time:411524ms step_avg:145.93ms step:2831/3200 train_loss:3.3287 train_time:411669ms step_avg:145.93ms step:2832/3200 train_loss:3.4821 train_time:411815ms step_avg:145.93ms step:2833/3200 train_loss:3.3976 train_time:411958ms step_avg:145.93ms step:2834/3200 train_loss:3.3771 train_time:412102ms step_avg:145.93ms step:2835/3200 train_loss:3.1936 train_time:412404ms step_avg:145.98ms step:2836/3200 train_loss:3.4117 train_time:412552ms step_avg:145.98ms step:2837/3200 train_loss:3.3460 train_time:412693ms step_avg:145.98ms step:2838/3200 train_loss:3.6471 train_time:412837ms step_avg:145.98ms step:2839/3200 train_loss:3.3056 train_time:412980ms step_avg:145.98ms step:2840/3200 train_loss:3.3106 train_time:413123ms step_avg:145.98ms step:2841/3200 train_loss:3.3626 train_time:413266ms step_avg:145.98ms step:2842/3200 train_loss:3.3008 train_time:413415ms step_avg:145.98ms step:2843/3200 train_loss:3.3006 train_time:413560ms step_avg:145.98ms step:2844/3200 train_loss:3.4692 train_time:413703ms step_avg:145.98ms step:2845/3200 train_loss:3.3542 train_time:413847ms step_avg:145.98ms step:2846/3200 train_loss:3.3854 train_time:413991ms step_avg:145.98ms step:2847/3200 train_loss:3.3412 train_time:414135ms step_avg:145.98ms step:2848/3200 train_loss:3.6030 train_time:414278ms step_avg:145.98ms step:2849/3200 train_loss:3.2692 train_time:414422ms step_avg:145.97ms step:2850/3200 train_loss:3.3117 train_time:414744ms step_avg:146.04ms step:2851/3200 train_loss:3.4075 train_time:414885ms step_avg:146.03ms step:2852/3200 train_loss:3.3869 train_time:415028ms step_avg:146.03ms step:2853/3200 train_loss:3.3414 train_time:415172ms step_avg:146.03ms step:2854/3200 train_loss:3.4140 train_time:415315ms step_avg:146.03ms step:2855/3200 train_loss:3.2427 train_time:415457ms step_avg:146.03ms step:2856/3200 train_loss:3.2623 train_time:415602ms step_avg:146.03ms step:2857/3200 train_loss:3.3639 train_time:415753ms step_avg:146.03ms step:2858/3200 train_loss:3.3547 train_time:415898ms step_avg:146.03ms step:2859/3200 train_loss:3.2510 train_time:416040ms step_avg:146.03ms step:2860/3200 train_loss:3.3433 train_time:416184ms step_avg:146.03ms step:2861/3200 train_loss:3.3089 train_time:416327ms step_avg:146.03ms step:2862/3200 train_loss:3.3392 train_time:416470ms step_avg:146.03ms step:2863/3200 train_loss:3.3908 train_time:416614ms step_avg:146.03ms step:2864/3200 train_loss:3.6509 train_time:416759ms step_avg:146.03ms step:2865/3200 train_loss:3.4538 train_time:416906ms step_avg:146.03ms step:2866/3200 train_loss:3.3494 train_time:417050ms step_avg:146.03ms step:2867/3200 train_loss:3.2244 train_time:417193ms step_avg:146.02ms step:2868/3200 train_loss:3.4361 train_time:417337ms step_avg:146.02ms step:2869/3200 train_loss:3.3911 train_time:417479ms step_avg:146.02ms step:2870/3200 train_loss:3.3450 train_time:417623ms step_avg:146.02ms step:2871/3200 train_loss:3.4860 train_time:417769ms step_avg:146.02ms step:2872/3200 train_loss:3.2509 train_time:417916ms step_avg:146.02ms step:2873/3200 train_loss:3.3262 train_time:418059ms step_avg:146.02ms step:2874/3200 train_loss:3.1986 train_time:418202ms step_avg:146.02ms step:2875/3200 train_loss:3.3464 train_time:418346ms step_avg:146.02ms step:2875/3200 val_loss:3.3446 train_time:418397ms step_avg:146.04ms step:2876/3200 train_loss:3.2621 train_time:418500ms step_avg:146.02ms step:2877/3200 train_loss:3.2519 train_time:418647ms step_avg:146.02ms step:2878/3200 train_loss:3.3362 train_time:418792ms step_avg:146.02ms step:2879/3200 train_loss:3.4529 train_time:418935ms step_avg:146.02ms step:2880/3200 train_loss:3.3996 train_time:419078ms step_avg:146.02ms step:2881/3200 train_loss:3.3527 train_time:419221ms step_avg:146.02ms step:2882/3200 train_loss:3.3401 train_time:419363ms step_avg:146.02ms step:2883/3200 train_loss:3.4609 train_time:419511ms step_avg:146.02ms step:2884/3200 train_loss:3.2376 train_time:419659ms step_avg:146.02ms step:2885/3200 train_loss:3.2636 train_time:419803ms step_avg:146.02ms step:2886/3200 train_loss:3.3108 train_time:419948ms step_avg:146.02ms step:2887/3200 train_loss:3.3052 train_time:420092ms step_avg:146.02ms step:2888/3200 train_loss:3.3077 train_time:420235ms step_avg:146.02ms step:2889/3200 train_loss:3.3420 train_time:420378ms step_avg:146.02ms step:2890/3200 train_loss:3.5288 train_time:420521ms step_avg:146.01ms step:2891/3200 train_loss:3.3640 train_time:420666ms step_avg:146.01ms step:2892/3200 train_loss:3.1966 train_time:420812ms step_avg:146.01ms step:2893/3200 train_loss:3.1302 train_time:420956ms step_avg:146.01ms step:2894/3200 train_loss:3.2726 train_time:421100ms step_avg:146.01ms step:2895/3200 train_loss:3.1575 train_time:421243ms step_avg:146.01ms step:2896/3200 train_loss:3.3371 train_time:421387ms step_avg:146.01ms step:2897/3200 train_loss:3.4639 train_time:421531ms step_avg:146.01ms step:2898/3200 train_loss:3.2910 train_time:421677ms step_avg:146.01ms step:2899/3200 train_loss:3.3894 train_time:421820ms step_avg:146.01ms step:2900/3200 train_loss:3.2640 train_time:421964ms step_avg:146.01ms step:2901/3200 train_loss:3.4588 train_time:422110ms step_avg:146.01ms step:2902/3200 train_loss:3.4456 train_time:422255ms step_avg:146.01ms step:2903/3200 train_loss:3.4703 train_time:422398ms step_avg:146.01ms step:2904/3200 train_loss:3.1973 train_time:422541ms step_avg:146.01ms step:2905/3200 train_loss:3.3445 train_time:422685ms step_avg:146.01ms step:2906/3200 train_loss:3.3176 train_time:422832ms step_avg:146.01ms step:2907/3200 train_loss:3.3804 train_time:422977ms step_avg:146.01ms step:2908/3200 train_loss:3.3282 train_time:423120ms step_avg:146.00ms step:2909/3200 train_loss:3.3052 train_time:423263ms step_avg:146.00ms step:2910/3200 train_loss:3.6354 train_time:423407ms step_avg:146.00ms step:2911/3200 train_loss:3.3433 train_time:423554ms step_avg:146.00ms step:2912/3200 train_loss:3.2584 train_time:423698ms step_avg:146.00ms step:2913/3200 train_loss:3.2449 train_time:423841ms step_avg:146.00ms step:2914/3200 train_loss:3.7205 train_time:423985ms step_avg:146.00ms step:2915/3200 train_loss:3.3118 train_time:424130ms step_avg:146.00ms step:2916/3200 train_loss:3.2651 train_time:424276ms step_avg:146.00ms step:2917/3200 train_loss:3.2511 train_time:424420ms step_avg:146.00ms step:2918/3200 train_loss:3.5280 train_time:424564ms step_avg:146.00ms step:2919/3200 train_loss:3.0297 train_time:424710ms step_avg:146.00ms step:2920/3200 train_loss:3.2310 train_time:424857ms step_avg:146.00ms step:2921/3200 train_loss:3.2524 train_time:425000ms step_avg:146.00ms step:2922/3200 train_loss:3.3548 train_time:425143ms step_avg:146.00ms step:2923/3200 train_loss:3.3888 train_time:425286ms step_avg:146.00ms step:2924/3200 train_loss:3.4230 train_time:425430ms step_avg:146.00ms step:2925/3200 train_loss:3.4402 train_time:425576ms step_avg:146.00ms step:2926/3200 train_loss:3.3211 train_time:425719ms step_avg:145.99ms step:2927/3200 train_loss:3.3260 train_time:425863ms step_avg:145.99ms step:2928/3200 train_loss:3.3100 train_time:426009ms step_avg:145.99ms step:2929/3200 train_loss:3.3190 train_time:426154ms step_avg:145.99ms step:2930/3200 train_loss:3.2718 train_time:426297ms step_avg:145.99ms step:2931/3200 train_loss:3.3055 train_time:426440ms step_avg:145.99ms step:2932/3200 train_loss:3.4333 train_time:426584ms step_avg:145.99ms step:2933/3200 train_loss:3.4764 train_time:426728ms step_avg:145.99ms step:2934/3200 train_loss:3.4522 train_time:426875ms step_avg:145.99ms step:2935/3200 train_loss:3.2928 train_time:427018ms step_avg:145.99ms step:2936/3200 train_loss:3.3408 train_time:427162ms step_avg:145.99ms step:2937/3200 train_loss:3.2910 train_time:427306ms step_avg:145.99ms step:2938/3200 train_loss:3.3112 train_time:427453ms step_avg:145.99ms step:2939/3200 train_loss:3.3367 train_time:427596ms step_avg:145.99ms step:2940/3200 train_loss:3.3745 train_time:427740ms step_avg:145.99ms step:2941/3200 train_loss:3.4184 train_time:427884ms step_avg:145.99ms step:2942/3200 train_loss:3.4108 train_time:428028ms step_avg:145.99ms step:2943/3200 train_loss:3.3398 train_time:428175ms step_avg:145.99ms step:2944/3200 train_loss:3.2152 train_time:428318ms step_avg:145.98ms step:2945/3200 train_loss:3.1623 train_time:428463ms step_avg:145.98ms step:2946/3200 train_loss:3.3650 train_time:428608ms step_avg:145.98ms step:2947/3200 train_loss:3.4310 train_time:428753ms step_avg:145.98ms step:2948/3200 train_loss:3.3584 train_time:428897ms step_avg:145.98ms step:2949/3200 train_loss:3.5515 train_time:429048ms step_avg:145.98ms step:2950/3200 train_loss:3.3626 train_time:429184ms step_avg:145.98ms step:2951/3200 train_loss:3.3674 train_time:429328ms step_avg:145.98ms step:2952/3200 train_loss:3.7647 train_time:429474ms step_avg:145.98ms step:2953/3200 train_loss:3.4384 train_time:429618ms step_avg:145.98ms step:2954/3200 train_loss:3.3863 train_time:429761ms step_avg:145.98ms step:2955/3200 train_loss:3.3993 train_time:429906ms step_avg:145.98ms step:2956/3200 train_loss:3.3306 train_time:430050ms step_avg:145.98ms step:2957/3200 train_loss:3.3535 train_time:430195ms step_avg:145.98ms step:2958/3200 train_loss:3.2325 train_time:430338ms step_avg:145.98ms step:2959/3200 train_loss:3.3122 train_time:430481ms step_avg:145.98ms step:2960/3200 train_loss:3.4552 train_time:430625ms step_avg:145.97ms step:2961/3200 train_loss:3.2609 train_time:430769ms step_avg:145.97ms step:2962/3200 train_loss:3.3885 train_time:430915ms step_avg:145.97ms step:2963/3200 train_loss:3.2567 train_time:431059ms step_avg:145.97ms step:2964/3200 train_loss:3.3109 train_time:431203ms step_avg:145.97ms step:2965/3200 train_loss:3.2919 train_time:431348ms step_avg:145.97ms step:2966/3200 train_loss:3.3970 train_time:431493ms step_avg:145.97ms step:2967/3200 train_loss:3.2792 train_time:431637ms step_avg:145.97ms step:2968/3200 train_loss:3.5183 train_time:431781ms step_avg:145.97ms step:2969/3200 train_loss:3.3711 train_time:431924ms step_avg:145.97ms step:2970/3200 train_loss:3.3906 train_time:432069ms step_avg:145.97ms step:2971/3200 train_loss:3.3660 train_time:432213ms step_avg:145.97ms step:2972/3200 train_loss:3.4457 train_time:432357ms step_avg:145.97ms step:2973/3200 train_loss:3.2715 train_time:432501ms step_avg:145.97ms step:2974/3200 train_loss:3.2768 train_time:432645ms step_avg:145.97ms step:2975/3200 train_loss:3.2029 train_time:432791ms step_avg:145.97ms step:2976/3200 train_loss:3.2769 train_time:432935ms step_avg:145.97ms step:2977/3200 train_loss:3.2607 train_time:433080ms step_avg:145.97ms step:2978/3200 train_loss:3.2903 train_time:433223ms step_avg:145.96ms step:2979/3200 train_loss:3.5682 train_time:433368ms step_avg:145.96ms step:2980/3200 train_loss:3.3728 train_time:433514ms step_avg:145.96ms step:2981/3200 train_loss:3.4116 train_time:433658ms step_avg:145.96ms step:2982/3200 train_loss:3.4340 train_time:433802ms step_avg:145.96ms step:2983/3200 train_loss:3.5004 train_time:433947ms step_avg:145.96ms step:2984/3200 train_loss:3.3134 train_time:434090ms step_avg:145.96ms step:2985/3200 train_loss:3.4091 train_time:434234ms step_avg:145.96ms step:2986/3200 train_loss:3.4134 train_time:434379ms step_avg:145.96ms step:2987/3200 train_loss:3.3616 train_time:434522ms step_avg:145.96ms step:2988/3200 train_loss:3.4751 train_time:434666ms step_avg:145.96ms step:2989/3200 train_loss:3.0729 train_time:434812ms step_avg:145.96ms step:2990/3200 train_loss:3.4211 train_time:434957ms step_avg:145.96ms step:2991/3200 train_loss:3.3776 train_time:435100ms step_avg:145.96ms step:2992/3200 train_loss:3.3307 train_time:435245ms step_avg:145.96ms step:2993/3200 train_loss:3.2722 train_time:435389ms step_avg:145.96ms step:2994/3200 train_loss:3.4080 train_time:435533ms step_avg:145.96ms step:2995/3200 train_loss:3.2361 train_time:435678ms step_avg:145.96ms step:2996/3200 train_loss:3.2477 train_time:435821ms step_avg:145.95ms step:2997/3200 train_loss:3.3257 train_time:435965ms step_avg:145.95ms step:2998/3200 train_loss:3.2653 train_time:436110ms step_avg:145.95ms step:2999/3200 train_loss:3.3920 train_time:436254ms step_avg:145.95ms step:3000/3200 train_loss:3.2906 train_time:436397ms step_avg:145.95ms step:3000/3200 val_loss:3.3263 train_time:436448ms step_avg:145.97ms step:3001/3200 train_loss:3.2862 train_time:436552ms step_avg:145.96ms step:3002/3200 train_loss:3.2344 train_time:436701ms step_avg:145.96ms step:3003/3200 train_loss:3.2671 train_time:436845ms step_avg:145.96ms step:3004/3200 train_loss:3.4029 train_time:436987ms step_avg:145.95ms step:3005/3200 train_loss:3.7350 train_time:437130ms step_avg:145.95ms step:3006/3200 train_loss:3.3036 train_time:437273ms step_avg:145.95ms step:3007/3200 train_loss:3.3774 train_time:437417ms step_avg:145.95ms step:3008/3200 train_loss:3.1858 train_time:437563ms step_avg:145.95ms step:3009/3200 train_loss:3.4079 train_time:437710ms step_avg:145.95ms step:3010/3200 train_loss:3.3072 train_time:437855ms step_avg:145.95ms step:3011/3200 train_loss:3.3668 train_time:437999ms step_avg:145.95ms step:3012/3200 train_loss:3.3592 train_time:438142ms step_avg:145.95ms step:3013/3200 train_loss:3.2400 train_time:438285ms step_avg:145.95ms step:3014/3200 train_loss:3.4443 train_time:438427ms step_avg:145.95ms step:3015/3200 train_loss:3.4046 train_time:438572ms step_avg:145.95ms step:3016/3200 train_loss:3.2706 train_time:438719ms step_avg:145.95ms step:3017/3200 train_loss:3.3154 train_time:438865ms step_avg:145.95ms step:3018/3200 train_loss:3.3582 train_time:439008ms step_avg:145.95ms step:3019/3200 train_loss:3.3968 train_time:439151ms step_avg:145.95ms step:3020/3200 train_loss:3.1800 train_time:439294ms step_avg:145.94ms step:3021/3200 train_loss:3.4788 train_time:439439ms step_avg:145.94ms step:3022/3200 train_loss:3.3078 train_time:439583ms step_avg:145.94ms step:3023/3200 train_loss:3.2265 train_time:439729ms step_avg:145.94ms step:3024/3200 train_loss:3.3307 train_time:440031ms step_avg:146.00ms step:3025/3200 train_loss:3.3008 train_time:440179ms step_avg:146.00ms step:3026/3200 train_loss:3.3682 train_time:440323ms step_avg:146.00ms step:3027/3200 train_loss:3.3880 train_time:440466ms step_avg:145.99ms step:3028/3200 train_loss:3.2895 train_time:440609ms step_avg:145.99ms step:3029/3200 train_loss:3.0963 train_time:440751ms step_avg:145.99ms step:3030/3200 train_loss:3.4367 train_time:440894ms step_avg:145.99ms step:3031/3200 train_loss:3.1979 train_time:441043ms step_avg:145.99ms step:3032/3200 train_loss:3.1905 train_time:441188ms step_avg:145.99ms step:3033/3200 train_loss:3.5300 train_time:441332ms step_avg:145.99ms step:3034/3200 train_loss:3.5307 train_time:441477ms step_avg:145.99ms step:3035/3200 train_loss:3.2927 train_time:441622ms step_avg:145.99ms step:3036/3200 train_loss:3.3753 train_time:441764ms step_avg:145.99ms step:3037/3200 train_loss:3.3204 train_time:441908ms step_avg:145.99ms step:3038/3200 train_loss:3.2210 train_time:442052ms step_avg:145.99ms step:3039/3200 train_loss:3.2702 train_time:442198ms step_avg:145.99ms step:3040/3200 train_loss:3.3736 train_time:442508ms step_avg:146.04ms step:3041/3200 train_loss:3.3568 train_time:442651ms step_avg:146.04ms step:3042/3200 train_loss:3.1570 train_time:442795ms step_avg:146.04ms step:3043/3200 train_loss:3.3172 train_time:442938ms step_avg:146.04ms step:3044/3200 train_loss:3.3461 train_time:443081ms step_avg:146.04ms step:3045/3200 train_loss:3.3528 train_time:443224ms step_avg:146.04ms step:3046/3200 train_loss:3.4258 train_time:443368ms step_avg:146.04ms step:3047/3200 train_loss:3.2490 train_time:443518ms step_avg:146.04ms step:3048/3200 train_loss:3.3817 train_time:443665ms step_avg:146.04ms step:3049/3200 train_loss:3.3212 train_time:443808ms step_avg:146.04ms step:3050/3200 train_loss:3.2444 train_time:443950ms step_avg:146.04ms step:3051/3200 train_loss:3.3751 train_time:444094ms step_avg:146.04ms step:3052/3200 train_loss:3.2175 train_time:444237ms step_avg:146.03ms step:3053/3200 train_loss:3.4586 train_time:444383ms step_avg:146.03ms step:3054/3200 train_loss:3.4108 train_time:444530ms step_avg:146.03ms step:3055/3200 train_loss:3.3825 train_time:444676ms step_avg:146.03ms step:3056/3200 train_loss:3.3938 train_time:444822ms step_avg:146.03ms step:3057/3200 train_loss:3.2654 train_time:444966ms step_avg:146.03ms step:3058/3200 train_loss:3.2933 train_time:445109ms step_avg:146.03ms step:3059/3200 train_loss:3.3656 train_time:445251ms step_avg:146.03ms step:3060/3200 train_loss:3.2816 train_time:445397ms step_avg:146.03ms step:3061/3200 train_loss:3.3343 train_time:445544ms step_avg:146.03ms step:3062/3200 train_loss:3.3393 train_time:445689ms step_avg:146.03ms step:3063/3200 train_loss:3.2771 train_time:445833ms step_avg:146.03ms step:3064/3200 train_loss:3.2499 train_time:445978ms step_avg:146.03ms step:3065/3200 train_loss:3.2649 train_time:446121ms step_avg:146.03ms step:3066/3200 train_loss:3.2440 train_time:446264ms step_avg:146.03ms step:3067/3200 train_loss:3.2334 train_time:446408ms step_avg:146.03ms step:3068/3200 train_loss:3.1963 train_time:446552ms step_avg:146.03ms step:3069/3200 train_loss:3.2367 train_time:446698ms step_avg:146.03ms step:3070/3200 train_loss:3.2255 train_time:446844ms step_avg:146.03ms step:3071/3200 train_loss:3.4150 train_time:446988ms step_avg:146.03ms step:3072/3200 train_loss:3.3470 train_time:447131ms step_avg:146.03ms step:3073/3200 train_loss:3.3875 train_time:447274ms step_avg:146.02ms step:3074/3200 train_loss:3.3746 train_time:447419ms step_avg:146.02ms step:3075/3200 train_loss:3.3231 train_time:447564ms step_avg:146.02ms step:3076/3200 train_loss:3.3749 train_time:447708ms step_avg:146.02ms step:3077/3200 train_loss:3.4283 train_time:447853ms step_avg:146.02ms step:3078/3200 train_loss:3.2318 train_time:447997ms step_avg:146.02ms step:3079/3200 train_loss:3.7604 train_time:448142ms step_avg:146.02ms step:3080/3200 train_loss:3.3173 train_time:448285ms step_avg:146.02ms step:3081/3200 train_loss:3.2794 train_time:448429ms step_avg:146.02ms step:3082/3200 train_loss:3.4297 train_time:448572ms step_avg:146.02ms step:3083/3200 train_loss:3.2365 train_time:448719ms step_avg:146.02ms step:3084/3200 train_loss:3.2671 train_time:448864ms step_avg:146.02ms step:3085/3200 train_loss:3.3214 train_time:449007ms step_avg:146.02ms step:3086/3200 train_loss:3.4168 train_time:449151ms step_avg:146.02ms step:3087/3200 train_loss:3.3249 train_time:449295ms step_avg:146.02ms step:3088/3200 train_loss:3.2328 train_time:449440ms step_avg:146.02ms step:3089/3200 train_loss:3.3878 train_time:449583ms step_avg:146.02ms step:3090/3200 train_loss:3.2500 train_time:449728ms step_avg:146.02ms step:3091/3200 train_loss:3.5119 train_time:449873ms step_avg:146.02ms step:3092/3200 train_loss:4.0800 train_time:450018ms step_avg:146.02ms step:3093/3200 train_loss:3.3489 train_time:450164ms step_avg:146.01ms step:3094/3200 train_loss:3.2341 train_time:450308ms step_avg:146.01ms step:3095/3200 train_loss:3.1971 train_time:450451ms step_avg:146.01ms step:3096/3200 train_loss:3.3615 train_time:450597ms step_avg:146.01ms step:3097/3200 train_loss:3.4967 train_time:450743ms step_avg:146.01ms step:3098/3200 train_loss:3.2625 train_time:450887ms step_avg:146.01ms step:3099/3200 train_loss:3.3001 train_time:451031ms step_avg:146.01ms step:3100/3200 train_loss:3.4736 train_time:451175ms step_avg:146.01ms step:3101/3200 train_loss:3.3783 train_time:451320ms step_avg:146.01ms step:3102/3200 train_loss:3.3705 train_time:451465ms step_avg:146.01ms step:3103/3200 train_loss:3.2816 train_time:451608ms step_avg:146.01ms step:3104/3200 train_loss:3.5368 train_time:451753ms step_avg:146.01ms step:3105/3200 train_loss:3.3614 train_time:451901ms step_avg:146.01ms step:3106/3200 train_loss:3.2154 train_time:452046ms step_avg:146.01ms step:3107/3200 train_loss:3.2419 train_time:452189ms step_avg:146.01ms step:3108/3200 train_loss:3.1958 train_time:452332ms step_avg:146.01ms step:3109/3200 train_loss:3.4189 train_time:452478ms step_avg:146.01ms step:3110/3200 train_loss:3.3082 train_time:452622ms step_avg:146.01ms step:3111/3200 train_loss:3.3398 train_time:452766ms step_avg:146.01ms step:3112/3200 train_loss:3.3173 train_time:452910ms step_avg:146.01ms step:3113/3200 train_loss:3.3742 train_time:453053ms step_avg:146.00ms step:3114/3200 train_loss:3.3290 train_time:453198ms step_avg:146.00ms step:3115/3200 train_loss:3.3300 train_time:453344ms step_avg:146.00ms step:3116/3200 train_loss:3.3678 train_time:453487ms step_avg:146.00ms step:3117/3200 train_loss:3.2148 train_time:453631ms step_avg:146.00ms step:3118/3200 train_loss:3.2434 train_time:453777ms step_avg:146.00ms step:3119/3200 train_loss:3.4197 train_time:453923ms step_avg:146.00ms step:3120/3200 train_loss:3.4004 train_time:454067ms step_avg:146.00ms step:3121/3200 train_loss:3.1865 train_time:454212ms step_avg:146.00ms step:3122/3200 train_loss:3.3840 train_time:454356ms step_avg:146.00ms step:3123/3200 train_loss:3.4399 train_time:454501ms step_avg:146.00ms step:3124/3200 train_loss:3.4095 train_time:454644ms step_avg:146.00ms step:3125/3200 train_loss:3.2043 train_time:454788ms step_avg:146.00ms step:3125/3200 val_loss:3.3110 train_time:454839ms step_avg:146.02ms step:3126/3200 train_loss:3.2907 train_time:454941ms step_avg:146.00ms step:3127/3200 train_loss:3.3168 train_time:455089ms step_avg:146.00ms step:3128/3200 train_loss:3.4064 train_time:455233ms step_avg:146.00ms step:3129/3200 train_loss:3.4833 train_time:455376ms step_avg:146.00ms step:3130/3200 train_loss:3.1798 train_time:455519ms step_avg:146.00ms step:3131/3200 train_loss:3.3529 train_time:455661ms step_avg:146.00ms step:3132/3200 train_loss:3.3473 train_time:455803ms step_avg:146.00ms step:3133/3200 train_loss:3.3727 train_time:455950ms step_avg:146.00ms step:3134/3200 train_loss:3.2593 train_time:456097ms step_avg:146.00ms step:3135/3200 train_loss:3.3858 train_time:456243ms step_avg:146.00ms step:3136/3200 train_loss:3.2924 train_time:456386ms step_avg:146.00ms step:3137/3200 train_loss:3.3648 train_time:456529ms step_avg:146.00ms step:3138/3200 train_loss:3.5485 train_time:456672ms step_avg:145.99ms step:3139/3200 train_loss:3.5313 train_time:456817ms step_avg:145.99ms step:3140/3200 train_loss:3.2957 train_time:456964ms step_avg:145.99ms step:3141/3200 train_loss:3.3167 train_time:457109ms step_avg:145.99ms step:3142/3200 train_loss:3.2374 train_time:457253ms step_avg:145.99ms step:3143/3200 train_loss:3.3311 train_time:457399ms step_avg:145.99ms step:3144/3200 train_loss:3.1287 train_time:457543ms step_avg:145.99ms step:3145/3200 train_loss:3.3670 train_time:457686ms step_avg:145.99ms step:3146/3200 train_loss:3.2786 train_time:457830ms step_avg:145.99ms step:3147/3200 train_loss:3.2994 train_time:457976ms step_avg:145.99ms step:3148/3200 train_loss:3.4741 train_time:458122ms step_avg:145.99ms step:3149/3200 train_loss:3.5520 train_time:458266ms step_avg:145.99ms step:3150/3200 train_loss:3.4298 train_time:458410ms step_avg:145.99ms step:3151/3200 train_loss:3.2368 train_time:458554ms step_avg:145.99ms step:3152/3200 train_loss:3.2920 train_time:458700ms step_avg:145.99ms step:3153/3200 train_loss:3.2626 train_time:458844ms step_avg:145.99ms step:3154/3200 train_loss:3.3875 train_time:458988ms step_avg:145.99ms step:3155/3200 train_loss:3.2027 train_time:459132ms step_avg:145.99ms step:3156/3200 train_loss:3.3298 train_time:459278ms step_avg:145.99ms step:3157/3200 train_loss:3.2775 train_time:459424ms step_avg:145.99ms step:3158/3200 train_loss:3.4089 train_time:459567ms step_avg:145.99ms step:3159/3200 train_loss:3.4571 train_time:459711ms step_avg:145.99ms step:3160/3200 train_loss:3.3141 train_time:459855ms step_avg:145.99ms step:3161/3200 train_loss:3.3718 train_time:460001ms step_avg:145.99ms step:3162/3200 train_loss:3.4535 train_time:460145ms step_avg:145.98ms step:3163/3200 train_loss:3.3591 train_time:460289ms step_avg:145.98ms step:3164/3200 train_loss:3.4050 train_time:460433ms step_avg:145.98ms step:3165/3200 train_loss:3.2359 train_time:460577ms step_avg:145.98ms step:3166/3200 train_loss:3.2175 train_time:460721ms step_avg:145.98ms step:3167/3200 train_loss:3.2568 train_time:460865ms step_avg:145.98ms step:3168/3200 train_loss:3.0718 train_time:461008ms step_avg:145.98ms step:3169/3200 train_loss:3.2489 train_time:461152ms step_avg:145.98ms step:3170/3200 train_loss:3.3827 train_time:461298ms step_avg:145.98ms step:3171/3200 train_loss:3.4036 train_time:461443ms step_avg:145.98ms step:3172/3200 train_loss:3.3806 train_time:461587ms step_avg:145.98ms step:3173/3200 train_loss:3.3525 train_time:461730ms step_avg:145.98ms step:3174/3200 train_loss:3.3194 train_time:461874ms step_avg:145.98ms step:3175/3200 train_loss:3.3173 train_time:462020ms step_avg:145.98ms step:3176/3200 train_loss:3.3097 train_time:462164ms step_avg:145.98ms step:3177/3200 train_loss:3.2487 train_time:462308ms step_avg:145.98ms step:3178/3200 train_loss:3.3705 train_time:462451ms step_avg:145.98ms step:3179/3200 train_loss:3.4609 train_time:462597ms step_avg:145.98ms step:3180/3200 train_loss:3.2925 train_time:462743ms step_avg:145.98ms step:3181/3200 train_loss:3.2871 train_time:462885ms step_avg:145.97ms step:3182/3200 train_loss:3.3272 train_time:463028ms step_avg:145.97ms step:3183/3200 train_loss:3.4265 train_time:463173ms step_avg:145.97ms step:3184/3200 train_loss:3.4448 train_time:463319ms step_avg:145.97ms step:3185/3200 train_loss:3.3397 train_time:463463ms step_avg:145.97ms step:3186/3200 train_loss:3.4127 train_time:463606ms step_avg:145.97ms step:3187/3200 train_loss:3.3987 train_time:463750ms step_avg:145.97ms step:3188/3200 train_loss:3.1972 train_time:463896ms step_avg:145.97ms step:3189/3200 train_loss:3.2888 train_time:464041ms step_avg:145.97ms step:3190/3200 train_loss:3.3106 train_time:464185ms step_avg:145.97ms step:3191/3200 train_loss:3.3372 train_time:464328ms step_avg:145.97ms step:3192/3200 train_loss:3.2962 train_time:464473ms step_avg:145.97ms step:3193/3200 train_loss:3.2253 train_time:464617ms step_avg:145.97ms step:3194/3200 train_loss:4.2465 train_time:464762ms step_avg:145.97ms step:3195/3200 train_loss:3.3328 train_time:464906ms step_avg:145.97ms step:3196/3200 train_loss:3.1350 train_time:465050ms step_avg:145.97ms step:3197/3200 train_loss:3.2775 train_time:465194ms step_avg:145.97ms step:3198/3200 train_loss:3.1523 train_time:465339ms step_avg:145.97ms step:3199/3200 train_loss:3.2745 train_time:465484ms step_avg:145.97ms step:3200/3200 train_loss:3.2148 train_time:465628ms step_avg:145.96ms step:3200/3200 val_loss:3.3065 train_time:465680ms step_avg:145.98ms