==================================================================================================== import os import sys with open(sys.argv[0]) as f: code = f.read() # read the code of this file ASAP, for logging import uuid import glob import time from dataclasses import dataclass import numpy as np import torch from torch import nn import torch.nn.functional as F import torch.distributed as dist import torch._inductor.config as config from torch.nn.parallel import DistributedDataParallel as DDP # ----------------------------------------------------------------------------- # Muon optimizer def zeropower_via_svd(G, steps=None): U, S, V = G.svd() return U @ V.T @torch.compile def zeropower_via_newtonschulz5(G, steps=10, eps=1e-7): """ Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. We opt to use a quintic iteration whose coefficients are selected to maximize the slope at zero. For the purpose of minimizing steps, it turns out to be empirically effective to keep increasing the slope at zero even beyond the point where the iteration no longer converges all the way to one everywhere on the interval. This iteration therefore does not produce UV^T but rather something like US'V^T where S' is diagonal with S_{ii}' \sim Uniform(0.5, 1.5), which turns out not to hurt model performance at all relative to UV^T, where USV^T = G is the SVD. """ assert len(G.shape) == 2 a, b, c = (3.4445, -4.7750, 2.0315) X = G.bfloat16() X /= (X.norm() + eps) # ensure top singular value <= 1 if G.size(0) > G.size(1): X = X.T for _ in range(steps): A = X @ X.T B = A @ X X = a * X + b * B + c * A @ B if G.size(0) > G.size(1): X = X.T return X zeropower_backends = dict(svd=zeropower_via_svd, newtonschulz5=zeropower_via_newtonschulz5) class Muon(torch.optim.Optimizer): """ Muon - MomentUm Orthogonalized by Newton-schulz Muon internally runs standard SGD-momentum, and then performs an orthogonalization post- processing step, in which each 2D parameter's update is replaced with the nearest orthogonal matrix. To efficiently orthogonalize each update, we use a Newton-Schulz iteration, which has the advantage that it can be stably run in bfloat16 on the GPU. Some warnings: - This optimizer assumes that all parameters passed in are 2D. - It should not be used for the embedding layer, the final fully connected layer, or any {0,1}-D parameters; those should all be optimized by a standard method (e.g., AdamW). - To use it with 4D convolutional filters, it works well to just flatten their last 3 dimensions. - We believe it is unlikely to work well for training with small batch size. - We believe it may not work well for finetuning pretrained models, but we haven't tested this. - We have not yet tried this optimizer for training scenarios larger than NanoGPT (124M). Arguments: lr: The learning rate used by the internal SGD. momentum: The momentum used by the internal SGD. nesterov: Whether to use Nesterov-style momentum in the internal SGD. (recommended) backend: The chosen backend for the orthogonalization step. (recommended: 'newtonschulz5') backend_steps: The number of iteration steps to use in the backend, if it is iterative. """ def __init__(self, params, lr=0.02, momentum=0.95, nesterov=True, backend='newtonschulz5', backend_steps=5): defaults = dict(lr=lr, momentum=momentum, nesterov=nesterov, backend=backend, backend_steps=backend_steps) super().__init__(params, defaults) def step(self): for group in self.param_groups: lr = group['lr'] momentum = group['momentum'] zeropower_backend = zeropower_backends[group['backend']] # generate weight updates in distributed fashion total_params = sum(p.numel() for p in group['params']) updates_flat = torch.zeros(total_params, device='cuda', dtype=torch.bfloat16) curr_idx = 0 for i, p in enumerate(group['params']): # luckily this will perfectly distribute a transformer with multiple of 4 layers to 8 GPUs if i % int(os.environ['WORLD_SIZE']) == int(os.environ['RANK']): g = p.grad assert g is not None state = self.state[p] if 'momentum_buffer' not in state: state['momentum_buffer'] = torch.zeros_like(g) buf = state['momentum_buffer'] buf.mul_(momentum).add_(g) if group['nesterov']: g = g.add(buf, alpha=momentum) g = zeropower_backend(g, steps=group['backend_steps']) g *= max(1, g.size(0)/g.size(1))**0.5 updates_flat[curr_idx:curr_idx+p.numel()] = g.flatten() curr_idx += p.numel() # sync updates across devices. we are not memory-constrained so can do this simple deserialization dist.all_reduce(updates_flat, op=dist.ReduceOp.SUM) # deserialize and apply updates curr_idx = 0 for p in group['params']: g = updates_flat[curr_idx:curr_idx+p.numel()].view_as(p.data).type_as(p.data) p.data.add_(g, alpha=-lr) curr_idx += p.numel() # ----------------------------------------------------------------------------- # PyTorch nn.Module definitions for the GPT-2 model class Rotary(torch.nn.Module): def __init__(self, dim, base=10000): super().__init__() self.inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim)) self.seq_len_cached = None self.cos_cached = None self.sin_cached = None def forward(self, x): seq_len = x.shape[1] if seq_len != self.seq_len_cached: self.seq_len_cached = seq_len t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq) freqs = torch.outer(t, self.inv_freq).to(x.device) self.cos_cached = freqs.cos().bfloat16() self.sin_cached = freqs.sin().bfloat16() return self.cos_cached[None, :, None, :], self.sin_cached[None, :, None, :] def apply_rotary_emb(x, cos, sin): assert x.ndim == 4 # multihead attention d = x.shape[3]//2 x1 = x[..., :d] x2 = x[..., d:] y1 = x1 * cos + x2 * sin y2 = x1 * (-sin) + x2 * cos return torch.cat([y1, y2], 3).type_as(x) class CausalSelfAttention(nn.Module): def __init__(self, config): super().__init__() self.n_head = config.n_head self.n_embd = config.n_embd self.head_dim = self.n_embd // self.n_head assert self.n_embd % self.n_head == 0 self.c_q = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_k = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_v = nn.Linear(self.n_embd, self.n_embd, bias=False) # output projection self.c_proj = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 self.rotary = Rotary(self.head_dim) def forward(self, x): B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd) q = self.c_q(x).view(B, T, self.n_head, self.head_dim) k = self.c_k(x).view(B, T, self.n_head, self.head_dim) v = self.c_v(x).view(B, T, self.n_head, self.head_dim) cos, sin = self.rotary(q) q, k = F.rms_norm(q, (q.size(-1),)), F.rms_norm(k, (k.size(-1),)) # QK norm suggested by @Grad62304977 q, k = apply_rotary_emb(q, cos, sin), apply_rotary_emb(k, cos, sin) y = F.scaled_dot_product_attention(q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), is_causal=True) y = y.transpose(1, 2).contiguous().view_as(x) # re-assemble all head outputs side by side y = self.c_proj(y) return y class MLP(nn.Module): def __init__(self, config): super().__init__() self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd, bias=False) self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 def forward(self, x): x = self.c_fc(x) x = F.relu(x).square() # https://arxiv.org/abs/2109.08668v2; ~1-2% better than GELU; suggested by @SKYLINEZ007 and @Grad62304977 x = self.c_proj(x) return x class Block(nn.Module): def __init__(self, config): super().__init__() self.attn = CausalSelfAttention(config) self.mlp = MLP(config) def forward(self, x): x = x + self.attn(F.rms_norm(x, (x.size(-1),))) x = x + self.mlp(F.rms_norm(x, (x.size(-1),))) return x # ----------------------------------------------------------------------------- # The main GPT-2 model @dataclass class GPTConfig: vocab_size : int = 50304 n_layer : int = 12 n_head : int = 6 # head dim 128 suggested by @Grad62304977 n_embd : int = 768 class GPT(nn.Module): def __init__(self, config): super().__init__() self.config = config self.transformer = nn.ModuleDict(dict( wte = nn.Embedding(config.vocab_size, config.n_embd), h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]), )) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.lm_head.weight.data.zero_() # @Grad62304977 def forward(self, idx, targets=None, return_logits=True): # forward the GPT model itself x = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) x = F.rms_norm(x, (x.size(-1),)) # @Grad62304977 for block in self.transformer.h: x = block(x) x = F.rms_norm(x, (x.size(-1),)) if targets is not None: # if we are given some desired targets also calculate the loss logits = self.lm_head(x) logits = logits.float() # use tf32/fp32 for logits loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1) else: # inference-time mini-optimization: only forward the lm_head on the very last position logits = self.lm_head(x[:, [-1], :]) # note: using list [-1] to preserve the time dim logits = logits.float() # use tf32/fp32 for logits loss = None # there are performance reasons why not returning logits is prudent, if not needed if not return_logits: logits = None return logits, loss # ----------------------------------------------------------------------------- # Our own simple Distributed Data Loader def _peek_data_shard(filename): # only reads the header, returns header data with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) if header[0] != 20240520: print("ERROR: magic number mismatch in the data .bin file!") print("---> HINT: Are you passing in a correct file with --input_bin?") print("---> HINT: Dataset encoding changed recently, re-run data prepro or refer again to README") print("---> HINT: For example re-run: `python dev/data/tinyshakespeare.py`, then re-try") exit(1) assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) return ntok # for now just return the number of tokens def _load_data_shard(filename): with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) assert header[0] == 20240520, "magic number mismatch in the data .bin file" assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) # the rest of it are tokens, stored as uint16 tokens = np.frombuffer(f.read(), dtype=np.uint16) assert len(tokens) == ntok, "number of tokens read does not match header?" return tokens class DistributedDataLoader: def __init__(self, filename_pattern, B, T, process_rank, num_processes): self.process_rank = process_rank self.num_processes = num_processes self.B = B self.T = T # glob files that match the pattern self.files = sorted(glob.glob(filename_pattern)) assert len(self.files) > 0, f"did not find any files that match the pattern {filename_pattern}" # load and validate all data shards, count number of tokens in total ntok_total = 0 for fname in self.files: shard_ntok = _peek_data_shard(fname) assert shard_ntok >= num_processes * B * T + 1 ntok_total += int(shard_ntok) self.ntok_total = ntok_total # kick things off self.reset() def reset(self): self.current_shard = 0 self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def advance(self): # advance to next data shard self.current_shard = (self.current_shard + 1) % len(self.files) self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def next_batch(self): B = self.B T = self.T buf = self.tokens[self.current_position : self.current_position+B*T+1] buf = torch.tensor(buf.astype(np.int32), dtype=torch.long) x = (buf[:-1]).view(B, T) # inputs y = (buf[1:]).view(B, T) # targets # advance current position and load next shard if necessary self.current_position += B * T * self.num_processes if self.current_position + (B * T * self.num_processes + 1) > len(self.tokens): self.advance() return x.cuda(), y.cuda() # ----------------------------------------------------------------------------- # int main @dataclass class Hyperparameters: # data hyperparams input_bin : str = 'data/fineweb10B/fineweb_train_*.bin' # input .bin to train on input_val_bin : str = 'data/fineweb10B/fineweb_val_*.bin' # input .bin to eval validation loss on # optimization hyperparams batch_size : int = 8*64 # batch size, in sequences, across all devices device_batch_size : int = 64 # batch size, in sequences, per device sequence_length : int = 1024 # sequence length, in tokens num_iterations : int = 3200 # number of iterations to run warmup_iters : int = 0 warmdown_iters : int = 914 # number of iterations of linear warmup/warmdown for triangular or trapezoidal schedule weight_decay : float = 0 # evaluation and logging hyperparams val_loss_every : int = 125 # every how many steps to evaluate val loss? 0 for only at the end val_tokens : int = 10485760 # how many tokens of validation data? it's important to keep this fixed for consistent comparisons save_every : int = 0 # every how many steps to save the checkpoint? 0 for only at the end args = Hyperparameters() # set up DDP (distributed data parallel). torchrun sets this env variable assert torch.cuda.is_available() dist.init_process_group(backend='nccl') ddp_rank = int(os.environ['RANK']) ddp_local_rank = int(os.environ['LOCAL_RANK']) ddp_world_size = int(os.environ['WORLD_SIZE']) device = f'cuda:{ddp_local_rank}' torch.cuda.set_device(device) print(f"using device: {device}") master_process = (ddp_rank == 0) # this process will do logging, checkpointing etc. # convenience variables B, T = args.device_batch_size, args.sequence_length # calculate the number of steps to take in the val loop. assert args.val_tokens % (B * T * ddp_world_size) == 0 val_steps = args.val_tokens // (B * T * ddp_world_size) # calculate the steps of gradient accumulation required to attain the desired global batch size. assert args.batch_size % (B * ddp_world_size) == 0 train_accumulation_steps = args.batch_size // (B * ddp_world_size) # load tokens train_loader = DistributedDataLoader(args.input_bin, B, T, ddp_rank, ddp_world_size) val_loader = DistributedDataLoader(args.input_val_bin, B, T, ddp_rank, ddp_world_size) if master_process: print(f"Training DataLoader: total number of tokens: {train_loader.ntok_total} across {len(train_loader.files)} files") print(f"Validation DataLoader: total number of tokens: {val_loader.ntok_total} across {len(val_loader.files)} files") x, y = train_loader.next_batch() # there are only 50257 unique GPT-2 tokens; we extend to nearest multiple of 128 for efficiency. suggested to me by @Grad62304977. # this originates from Karpathy's experiments. num_vocab = 50304 model = GPT(GPTConfig(vocab_size=num_vocab, n_layer=12, n_head=6, n_embd=768)) model = model.cuda() if hasattr(config, "coordinate_descent_tuning"): config.coordinate_descent_tuning = True # suggested by @Chillee model = torch.compile(model) # here we wrap model into DDP container model = DDP(model, device_ids=[ddp_local_rank]) raw_model = model.module # always contains the "raw" unwrapped model ctx = torch.amp.autocast(device_type='cuda', dtype=torch.bfloat16) # CUDNN attention is ~4ms faster than Flash, but doesn't get selected by default in PyTorch 2.5.1 from torch.backends.cuda import enable_cudnn_sdp, enable_flash_sdp, enable_math_sdp, enable_mem_efficient_sdp enable_cudnn_sdp(True) enable_flash_sdp(False) enable_mem_efficient_sdp(False) enable_math_sdp(False) # init the optimizer(s) optimizer1 = torch.optim.Adam([raw_model.transformer.wte.weight], lr=0.3, betas=(0.9, 0.95), fused=True) optimizer2 = torch.optim.Adam([raw_model.lm_head.weight], lr=0.002, betas=(0.9, 0.95), fused=True) optimizer3 = Muon(raw_model.transformer.h.parameters(), lr=0.02, momentum=0.95) optimizers = [optimizer1, optimizer2, optimizer3] # learning rate decay scheduler (linear warmup and warmdown) def get_lr(it): assert it <= args.num_iterations # 1) linear warmup for warmup_iters steps if it < args.warmup_iters: return (it+1) / args.warmup_iters # 2) constant lr for a while elif it < args.num_iterations - args.warmdown_iters: return 1.0 # 3) linear warmdown else: decay_ratio = (args.num_iterations - it) / args.warmdown_iters return decay_ratio schedulers = [torch.optim.lr_scheduler.LambdaLR(opt, get_lr) for opt in optimizers] # begin logging if master_process: run_id = str(uuid.uuid4()) logdir = 'logs/%s/' % run_id os.makedirs(logdir, exist_ok=True) logfile = 'logs/%s.txt' % run_id # create the log file with open(logfile, "w") as f: # begin the log by printing this file (the Python code) f.write('='*100 + '\n') f.write(code) f.write('='*100 + '\n') # log information about the hardware/software environment this is running on # and print the full `nvidia-smi` to file f.write(f"Running pytorch {torch.version.__version__} compiled for CUDA {torch.version.cuda}\nnvidia-smi:\n") import subprocess result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) f.write(f'{result.stdout}\n') f.write('='*100 + '\n') training_time_ms = 0 # start the clock torch.cuda.synchronize() t0 = time.time() # begin training train_loader.reset() for step in range(args.num_iterations + 1): last_step = (step == args.num_iterations) # This effectively ignores timing first 10 steps, which are slower for weird reasons. # Alternately, and slightly more correctly in terms of benchmarking, we could do 10 # steps with dummy data first, and then re-initialize the model and reset the loader. if step == 10: training_time_ms = 0 t0 = time.time() timed_steps = float('nan') if step <= 11 else (step - 10) + 1 # <= 11 to avoid bug in val # once in a while evaluate the validation dataset if (last_step or (args.val_loss_every > 0 and step % args.val_loss_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # run validation batches model.eval() val_loader.reset() val_loss = 0.0 for _ in range(val_steps): x_val, y_val = val_loader.next_batch() with ctx: # of course, we'd like to use no_grad() here too, but that creates a torch.compile error for some reason _, loss = model(x_val, y_val, return_logits=False) val_loss += loss.detach() del loss dist.all_reduce(val_loss, op=dist.ReduceOp.AVG) val_loss /= val_steps # log val loss to console and to logfile if master_process: print(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms') with open(logfile, "a") as f: f.write(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms\n') # start the clock again torch.cuda.synchronize() t0 = time.time() if master_process and (last_step or (args.save_every > 0 and step % args.save_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # save the state of the training process log = dict(step=step, code=code, model=raw_model.state_dict(), optimizers=[opt.state_dict() for opt in optimizers]) torch.save(log, 'logs/%s/state_step%06d.pt' % (run_id, step)) # start the clock again torch.cuda.synchronize() t0 = time.time() # bit confusing: we want to make sure to eval on 0th iteration # but also after the very last iteration. so we loop for step <= num_iterations # instead of just < num_iterations (one extra due to <=), only to do # the validation/sampling one last time, and then we break right here as we're done. if last_step: break # --------------- TRAINING SECTION BEGIN ----------------- model.train() for i in range(1, train_accumulation_steps+1): # forward pass with ctx: _, loss = model(x, y, return_logits=False) train_loss = loss.detach() # advance the dataset for the next batch x, y = train_loader.next_batch() # backward pass if i < train_accumulation_steps: with model.no_sync(): # there's no need to sync gradients every accumulation step loss.backward() else: loss.backward() # just sync on the last step for p in model.parameters(): p.grad /= train_accumulation_steps # step the optimizers and schedulers for opt, sched in zip(optimizers, schedulers): opt.step() sched.step() # null the gradients model.zero_grad(set_to_none=True) # --------------- TRAINING SECTION END ------------------- # everything that follows now is just diagnostics, prints, logging, etc. #dist.all_reduce(train_loss, op=dist.ReduceOp.AVG) # all-reducing the training loss would be more correct in terms of logging, but slower if master_process: approx_time = training_time_ms + 1000 * (time.time() - t0) print(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms") with open(logfile, "a") as f: f.write(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms\n") if master_process: print(f"peak memory consumption: {torch.cuda.max_memory_allocated() // 1024 // 1024} MiB") # ------------------------------------------------------------------------- # clean up nice dist.destroy_process_group() ==================================================================================================== Running pytorch 2.5.1+cu124 compiled for CUDA 12.4 nvidia-smi: Wed Nov 6 19:21:48 2024 +-----------------------------------------------------------------------------------------+ | NVIDIA-SMI 555.42.06 Driver Version: 555.42.06 CUDA Version: 12.5 | |-----------------------------------------+------------------------+----------------------+ | GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |=========================================+========================+======================| | 0 NVIDIA H100 80GB HBM3 Off | 00000000:18:00.0 Off | 0 | | N/A 34C P0 141W / 700W | 5304MiB / 81559MiB | 6% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 1 NVIDIA H100 80GB HBM3 Off | 00000000:2A:00.0 Off | 0 | | N/A 35C P0 132W / 700W | 5352MiB / 81559MiB | 4% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 2 NVIDIA H100 80GB HBM3 Off | 00000000:3A:00.0 Off | 0 | | N/A 36C P0 126W / 700W | 5352MiB / 81559MiB | 5% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 3 NVIDIA H100 80GB HBM3 Off | 00000000:5D:00.0 Off | 0 | | N/A 33C P0 137W / 700W | 5352MiB / 81559MiB | 4% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 4 NVIDIA H100 80GB HBM3 Off | 00000000:9A:00.0 Off | 0 | | N/A 34C P0 142W / 700W | 5352MiB / 81559MiB | 0% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 5 NVIDIA H100 80GB HBM3 Off | 00000000:AB:00.0 Off | 0 | | N/A 37C P0 143W / 700W | 5352MiB / 81559MiB | 5% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 6 NVIDIA H100 80GB HBM3 Off | 00000000:BA:00.0 Off | 0 | | N/A 36C P0 143W / 700W | 5352MiB / 81559MiB | 3% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 7 NVIDIA H100 80GB HBM3 Off | 00000000:DB:00.0 Off | 0 | | N/A 35C P0 147W / 700W | 5112MiB / 81559MiB | 9% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ +-----------------------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=========================================================================================| | 0 N/A N/A 18921 C /usr/bin/python3 0MiB | | 1 N/A N/A 18922 C /usr/bin/python3 0MiB | | 2 N/A N/A 18923 C /usr/bin/python3 0MiB | | 3 N/A N/A 18924 C /usr/bin/python3 0MiB | | 4 N/A N/A 18925 C /usr/bin/python3 0MiB | | 5 N/A N/A 18926 C /usr/bin/python3 0MiB | | 6 N/A N/A 18927 C /usr/bin/python3 0MiB | | 7 N/A N/A 18928 C /usr/bin/python3 0MiB | +-----------------------------------------------------------------------------------------+ ==================================================================================================== step:0/3200 val_loss:10.8258 train_time:327ms step_avg:nanms step:1/3200 train_loss:10.8258 train_time:12839ms step_avg:nanms step:2/3200 train_loss:10.4256 train_time:12924ms step_avg:nanms step:3/3200 train_loss:9.9096 train_time:13058ms step_avg:nanms step:4/3200 train_loss:8.9988 train_time:13194ms step_avg:nanms step:5/3200 train_loss:8.0727 train_time:13331ms step_avg:nanms step:6/3200 train_loss:7.5462 train_time:13469ms step_avg:nanms step:7/3200 train_loss:7.0378 train_time:13607ms step_avg:nanms step:8/3200 train_loss:7.1942 train_time:13746ms step_avg:nanms step:9/3200 train_loss:6.9001 train_time:13892ms step_avg:nanms step:10/3200 train_loss:6.7542 train_time:14036ms step_avg:nanms step:11/3200 train_loss:6.7234 train_time:84ms step_avg:nanms step:12/3200 train_loss:6.6775 train_time:222ms step_avg:nanms step:13/3200 train_loss:6.5481 train_time:360ms step_avg:119.99ms step:14/3200 train_loss:6.5219 train_time:498ms step_avg:124.52ms step:15/3200 train_loss:6.4966 train_time:637ms step_avg:127.44ms step:16/3200 train_loss:6.4492 train_time:782ms step_avg:130.26ms step:17/3200 train_loss:6.4627 train_time:927ms step_avg:132.42ms step:18/3200 train_loss:6.5041 train_time:1065ms step_avg:133.13ms step:19/3200 train_loss:6.3395 train_time:1206ms step_avg:134.03ms step:20/3200 train_loss:6.3648 train_time:1346ms step_avg:134.58ms step:21/3200 train_loss:6.0618 train_time:1483ms step_avg:134.81ms step:22/3200 train_loss:6.4044 train_time:1624ms step_avg:135.35ms step:23/3200 train_loss:6.6241 train_time:1764ms step_avg:135.68ms step:24/3200 train_loss:6.2768 train_time:1904ms step_avg:136.01ms step:25/3200 train_loss:6.4352 train_time:2053ms step_avg:136.88ms step:26/3200 train_loss:6.1368 train_time:2190ms step_avg:136.85ms step:27/3200 train_loss:6.0519 train_time:2327ms step_avg:136.89ms step:28/3200 train_loss:6.2256 train_time:2466ms step_avg:137.00ms step:29/3200 train_loss:5.8967 train_time:2608ms step_avg:137.28ms step:30/3200 train_loss:6.1485 train_time:2750ms step_avg:137.50ms step:31/3200 train_loss:5.9977 train_time:2890ms step_avg:137.63ms step:32/3200 train_loss:5.9604 train_time:3032ms step_avg:137.81ms step:33/3200 train_loss:5.7967 train_time:3168ms step_avg:137.74ms step:34/3200 train_loss:6.1126 train_time:3309ms step_avg:137.86ms step:35/3200 train_loss:6.0078 train_time:3449ms step_avg:137.94ms step:36/3200 train_loss:6.1453 train_time:3588ms step_avg:138.00ms step:37/3200 train_loss:6.0698 train_time:3728ms step_avg:138.08ms step:38/3200 train_loss:5.9753 train_time:3869ms step_avg:138.18ms step:39/3200 train_loss:5.8623 train_time:4011ms step_avg:138.29ms step:40/3200 train_loss:5.8892 train_time:4153ms step_avg:138.42ms step:41/3200 train_loss:5.8043 train_time:4291ms step_avg:138.42ms step:42/3200 train_loss:5.8027 train_time:4432ms step_avg:138.49ms step:43/3200 train_loss:5.7167 train_time:4572ms step_avg:138.55ms step:44/3200 train_loss:5.7961 train_time:4715ms step_avg:138.67ms step:45/3200 train_loss:5.7782 train_time:4851ms step_avg:138.61ms step:46/3200 train_loss:5.9187 train_time:4995ms step_avg:138.75ms step:47/3200 train_loss:5.7141 train_time:5134ms step_avg:138.75ms step:48/3200 train_loss:5.5930 train_time:5276ms step_avg:138.85ms step:49/3200 train_loss:5.7894 train_time:5418ms step_avg:138.91ms step:50/3200 train_loss:5.6604 train_time:5559ms step_avg:138.98ms step:51/3200 train_loss:5.8115 train_time:5702ms step_avg:139.08ms step:52/3200 train_loss:5.6794 train_time:5840ms step_avg:139.05ms step:53/3200 train_loss:5.5397 train_time:5981ms step_avg:139.09ms step:54/3200 train_loss:5.6535 train_time:6122ms step_avg:139.13ms step:55/3200 train_loss:5.5438 train_time:6263ms step_avg:139.17ms step:56/3200 train_loss:5.8734 train_time:6403ms step_avg:139.20ms step:57/3200 train_loss:5.5220 train_time:6543ms step_avg:139.21ms step:58/3200 train_loss:5.4054 train_time:6683ms step_avg:139.23ms step:59/3200 train_loss:5.5566 train_time:6824ms step_avg:139.26ms step:60/3200 train_loss:5.5256 train_time:6964ms step_avg:139.27ms step:61/3200 train_loss:5.6125 train_time:7104ms step_avg:139.29ms step:62/3200 train_loss:5.3767 train_time:7245ms step_avg:139.32ms step:63/3200 train_loss:5.4739 train_time:7385ms step_avg:139.34ms step:64/3200 train_loss:5.4333 train_time:7525ms step_avg:139.35ms step:65/3200 train_loss:5.2116 train_time:7665ms step_avg:139.37ms step:66/3200 train_loss:5.2612 train_time:7806ms step_avg:139.40ms step:67/3200 train_loss:5.4204 train_time:7946ms step_avg:139.41ms step:68/3200 train_loss:5.2860 train_time:8086ms step_avg:139.42ms step:69/3200 train_loss:5.5218 train_time:8227ms step_avg:139.43ms step:70/3200 train_loss:5.1801 train_time:8366ms step_avg:139.44ms step:71/3200 train_loss:5.2653 train_time:8507ms step_avg:139.46ms step:72/3200 train_loss:5.4208 train_time:8648ms step_avg:139.48ms step:73/3200 train_loss:5.3603 train_time:8787ms step_avg:139.48ms step:74/3200 train_loss:5.2396 train_time:8928ms step_avg:139.50ms step:75/3200 train_loss:5.3471 train_time:9068ms step_avg:139.51ms step:76/3200 train_loss:5.3336 train_time:9209ms step_avg:139.53ms step:77/3200 train_loss:5.2690 train_time:9349ms step_avg:139.54ms step:78/3200 train_loss:5.3685 train_time:9491ms step_avg:139.57ms step:79/3200 train_loss:5.4577 train_time:9631ms step_avg:139.58ms step:80/3200 train_loss:5.2244 train_time:9774ms step_avg:139.63ms step:81/3200 train_loss:5.3213 train_time:9913ms step_avg:139.62ms step:82/3200 train_loss:5.0849 train_time:10054ms step_avg:139.64ms step:83/3200 train_loss:5.2597 train_time:10195ms step_avg:139.66ms step:84/3200 train_loss:5.2095 train_time:10336ms step_avg:139.67ms step:85/3200 train_loss:5.2016 train_time:10477ms step_avg:139.70ms step:86/3200 train_loss:5.0742 train_time:10618ms step_avg:139.71ms step:87/3200 train_loss:5.2658 train_time:10760ms step_avg:139.74ms step:88/3200 train_loss:5.1765 train_time:10901ms step_avg:139.76ms step:89/3200 train_loss:5.2228 train_time:11040ms step_avg:139.75ms step:90/3200 train_loss:5.1969 train_time:11182ms step_avg:139.78ms step:91/3200 train_loss:5.0963 train_time:11323ms step_avg:139.79ms step:92/3200 train_loss:5.1057 train_time:11464ms step_avg:139.80ms step:93/3200 train_loss:5.2301 train_time:11606ms step_avg:139.83ms step:94/3200 train_loss:5.0477 train_time:11744ms step_avg:139.81ms step:95/3200 train_loss:5.0507 train_time:11886ms step_avg:139.83ms step:96/3200 train_loss:5.0944 train_time:12027ms step_avg:139.85ms step:97/3200 train_loss:5.0012 train_time:12167ms step_avg:139.85ms step:98/3200 train_loss:5.0867 train_time:12308ms step_avg:139.86ms step:99/3200 train_loss:5.0070 train_time:12448ms step_avg:139.87ms step:100/3200 train_loss:5.1278 train_time:12589ms step_avg:139.88ms step:101/3200 train_loss:5.0922 train_time:12729ms step_avg:139.88ms step:102/3200 train_loss:4.9785 train_time:12871ms step_avg:139.90ms step:103/3200 train_loss:5.1059 train_time:13009ms step_avg:139.89ms step:104/3200 train_loss:5.0469 train_time:13151ms step_avg:139.90ms step:105/3200 train_loss:4.9207 train_time:13292ms step_avg:139.91ms step:106/3200 train_loss:4.9802 train_time:13433ms step_avg:139.93ms step:107/3200 train_loss:5.1744 train_time:13578ms step_avg:139.98ms step:108/3200 train_loss:4.9749 train_time:13716ms step_avg:139.96ms step:109/3200 train_loss:4.7626 train_time:13857ms step_avg:139.97ms step:110/3200 train_loss:4.9359 train_time:13999ms step_avg:139.99ms step:111/3200 train_loss:4.9183 train_time:14140ms step_avg:140.00ms step:112/3200 train_loss:4.8920 train_time:14282ms step_avg:140.02ms step:113/3200 train_loss:5.0141 train_time:14422ms step_avg:140.02ms step:114/3200 train_loss:4.9271 train_time:14562ms step_avg:140.02ms step:115/3200 train_loss:4.7846 train_time:14703ms step_avg:140.03ms step:116/3200 train_loss:4.9287 train_time:14843ms step_avg:140.02ms step:117/3200 train_loss:4.8456 train_time:14984ms step_avg:140.04ms step:118/3200 train_loss:4.8066 train_time:15125ms step_avg:140.05ms step:119/3200 train_loss:4.9569 train_time:15266ms step_avg:140.05ms step:120/3200 train_loss:4.9012 train_time:15407ms step_avg:140.06ms step:121/3200 train_loss:4.8250 train_time:15546ms step_avg:140.05ms step:122/3200 train_loss:4.7344 train_time:15685ms step_avg:140.05ms step:123/3200 train_loss:4.8508 train_time:15825ms step_avg:140.05ms step:124/3200 train_loss:4.7128 train_time:15965ms step_avg:140.04ms step:125/3200 train_loss:5.0085 train_time:16105ms step_avg:140.05ms step:125/3200 val_loss:4.8315 train_time:16160ms step_avg:140.52ms step:126/3200 train_loss:4.8744 train_time:16259ms step_avg:140.16ms step:127/3200 train_loss:4.8336 train_time:16402ms step_avg:140.18ms step:128/3200 train_loss:4.8889 train_time:16542ms step_avg:140.19ms step:129/3200 train_loss:4.7640 train_time:16681ms step_avg:140.18ms step:130/3200 train_loss:5.0670 train_time:16820ms step_avg:140.17ms step:131/3200 train_loss:4.8109 train_time:16961ms step_avg:140.17ms step:132/3200 train_loss:4.8148 train_time:17100ms step_avg:140.16ms step:133/3200 train_loss:4.7710 train_time:17244ms step_avg:140.20ms step:134/3200 train_loss:4.8111 train_time:17390ms step_avg:140.24ms step:135/3200 train_loss:4.7108 train_time:17530ms step_avg:140.24ms step:136/3200 train_loss:4.8281 train_time:17670ms step_avg:140.24ms step:137/3200 train_loss:4.6171 train_time:17810ms step_avg:140.23ms step:138/3200 train_loss:4.7686 train_time:17949ms step_avg:140.23ms step:139/3200 train_loss:4.7201 train_time:18089ms step_avg:140.22ms step:140/3200 train_loss:4.7518 train_time:18230ms step_avg:140.23ms step:141/3200 train_loss:4.8089 train_time:18372ms step_avg:140.25ms step:142/3200 train_loss:4.6914 train_time:18514ms step_avg:140.25ms step:143/3200 train_loss:4.7455 train_time:18654ms step_avg:140.25ms step:144/3200 train_loss:4.6077 train_time:18794ms step_avg:140.26ms step:145/3200 train_loss:4.7259 train_time:18933ms step_avg:140.25ms step:146/3200 train_loss:4.6869 train_time:19074ms step_avg:140.25ms step:147/3200 train_loss:4.5681 train_time:19217ms step_avg:140.27ms step:148/3200 train_loss:4.7210 train_time:19356ms step_avg:140.26ms step:149/3200 train_loss:4.7155 train_time:19497ms step_avg:140.26ms step:150/3200 train_loss:4.7266 train_time:19638ms step_avg:140.27ms step:151/3200 train_loss:4.7790 train_time:19780ms step_avg:140.28ms step:152/3200 train_loss:4.6486 train_time:19920ms step_avg:140.28ms step:153/3200 train_loss:4.6406 train_time:20061ms step_avg:140.28ms step:154/3200 train_loss:4.7269 train_time:20201ms step_avg:140.29ms step:155/3200 train_loss:4.6928 train_time:20345ms step_avg:140.31ms step:156/3200 train_loss:4.6342 train_time:20486ms step_avg:140.31ms step:157/3200 train_loss:4.6738 train_time:20628ms step_avg:140.32ms step:158/3200 train_loss:4.7810 train_time:20768ms step_avg:140.33ms step:159/3200 train_loss:4.5940 train_time:20908ms step_avg:140.32ms step:160/3200 train_loss:4.6554 train_time:21049ms step_avg:140.33ms step:161/3200 train_loss:4.4760 train_time:21189ms step_avg:140.33ms step:162/3200 train_loss:4.6602 train_time:21330ms step_avg:140.33ms step:163/3200 train_loss:4.6837 train_time:21470ms step_avg:140.33ms step:164/3200 train_loss:4.6828 train_time:21616ms step_avg:140.36ms step:165/3200 train_loss:4.4966 train_time:21751ms step_avg:140.33ms step:166/3200 train_loss:4.6060 train_time:21892ms step_avg:140.33ms step:167/3200 train_loss:4.7432 train_time:22031ms step_avg:140.32ms step:168/3200 train_loss:4.5233 train_time:22171ms step_avg:140.32ms step:169/3200 train_loss:4.6016 train_time:22311ms step_avg:140.32ms step:170/3200 train_loss:4.4820 train_time:22451ms step_avg:140.32ms step:171/3200 train_loss:4.3801 train_time:22592ms step_avg:140.32ms step:172/3200 train_loss:4.5198 train_time:22732ms step_avg:140.32ms step:173/3200 train_loss:4.5096 train_time:22872ms step_avg:140.32ms step:174/3200 train_loss:4.5556 train_time:23012ms step_avg:140.31ms step:175/3200 train_loss:4.7233 train_time:23152ms step_avg:140.31ms step:176/3200 train_loss:4.5588 train_time:23291ms step_avg:140.31ms step:177/3200 train_loss:4.4126 train_time:23431ms step_avg:140.31ms step:178/3200 train_loss:4.3867 train_time:23571ms step_avg:140.30ms step:179/3200 train_loss:4.4638 train_time:23712ms step_avg:140.31ms step:180/3200 train_loss:4.4506 train_time:23852ms step_avg:140.31ms step:181/3200 train_loss:4.4372 train_time:23992ms step_avg:140.31ms step:182/3200 train_loss:4.5815 train_time:24132ms step_avg:140.30ms step:183/3200 train_loss:4.4457 train_time:24271ms step_avg:140.30ms step:184/3200 train_loss:4.4227 train_time:24412ms step_avg:140.30ms step:185/3200 train_loss:4.4015 train_time:24552ms step_avg:140.30ms step:186/3200 train_loss:4.5163 train_time:24693ms step_avg:140.30ms step:187/3200 train_loss:4.4405 train_time:24833ms step_avg:140.30ms step:188/3200 train_loss:4.5908 train_time:24974ms step_avg:140.31ms step:189/3200 train_loss:4.4565 train_time:25272ms step_avg:141.19ms step:190/3200 train_loss:4.3900 train_time:25583ms step_avg:142.13ms step:191/3200 train_loss:4.5000 train_time:25718ms step_avg:142.09ms step:192/3200 train_loss:4.3594 train_time:25855ms step_avg:142.06ms step:193/3200 train_loss:4.2908 train_time:25994ms step_avg:142.05ms step:194/3200 train_loss:4.5214 train_time:26132ms step_avg:142.02ms step:195/3200 train_loss:4.4313 train_time:26271ms step_avg:142.00ms step:196/3200 train_loss:4.6321 train_time:26409ms step_avg:141.99ms step:197/3200 train_loss:4.4658 train_time:26554ms step_avg:142.00ms step:198/3200 train_loss:4.3205 train_time:26696ms step_avg:142.00ms step:199/3200 train_loss:4.4234 train_time:26836ms step_avg:141.99ms step:200/3200 train_loss:4.2911 train_time:26975ms step_avg:141.97ms step:201/3200 train_loss:4.3793 train_time:27114ms step_avg:141.96ms step:202/3200 train_loss:4.2709 train_time:27253ms step_avg:141.94ms step:203/3200 train_loss:4.5004 train_time:27394ms step_avg:141.94ms step:204/3200 train_loss:4.3394 train_time:27536ms step_avg:141.94ms step:205/3200 train_loss:4.4319 train_time:27677ms step_avg:141.93ms step:206/3200 train_loss:4.5069 train_time:27818ms step_avg:141.93ms step:207/3200 train_loss:4.1940 train_time:27957ms step_avg:141.91ms step:208/3200 train_loss:4.3371 train_time:28097ms step_avg:141.90ms step:209/3200 train_loss:4.3271 train_time:28235ms step_avg:141.89ms step:210/3200 train_loss:4.4819 train_time:28381ms step_avg:141.91ms step:211/3200 train_loss:4.4218 train_time:28516ms step_avg:141.87ms step:212/3200 train_loss:4.3033 train_time:28657ms step_avg:141.87ms step:213/3200 train_loss:4.3513 train_time:28797ms step_avg:141.86ms step:214/3200 train_loss:4.2723 train_time:28938ms step_avg:141.85ms step:215/3200 train_loss:4.3516 train_time:29078ms step_avg:141.84ms step:216/3200 train_loss:4.1748 train_time:29219ms step_avg:141.84ms step:217/3200 train_loss:4.2557 train_time:29359ms step_avg:141.83ms step:218/3200 train_loss:4.2538 train_time:29499ms step_avg:141.82ms step:219/3200 train_loss:4.3058 train_time:29641ms step_avg:141.82ms step:220/3200 train_loss:4.3025 train_time:29782ms step_avg:141.82ms step:221/3200 train_loss:4.3186 train_time:29923ms step_avg:141.81ms step:222/3200 train_loss:4.3379 train_time:30064ms step_avg:141.81ms step:223/3200 train_loss:4.2629 train_time:30204ms step_avg:141.80ms step:224/3200 train_loss:4.2214 train_time:30345ms step_avg:141.80ms step:225/3200 train_loss:4.4968 train_time:30485ms step_avg:141.79ms step:226/3200 train_loss:4.1167 train_time:30625ms step_avg:141.78ms step:227/3200 train_loss:4.2040 train_time:30767ms step_avg:141.78ms step:228/3200 train_loss:4.2082 train_time:30909ms step_avg:141.79ms step:229/3200 train_loss:4.3550 train_time:31047ms step_avg:141.77ms step:230/3200 train_loss:4.1476 train_time:31187ms step_avg:141.76ms step:231/3200 train_loss:4.2696 train_time:31326ms step_avg:141.75ms step:232/3200 train_loss:4.1369 train_time:31467ms step_avg:141.74ms step:233/3200 train_loss:4.1843 train_time:31608ms step_avg:141.74ms step:234/3200 train_loss:4.3320 train_time:31748ms step_avg:141.73ms step:235/3200 train_loss:4.2285 train_time:31889ms step_avg:141.73ms step:236/3200 train_loss:4.1281 train_time:32027ms step_avg:141.71ms step:237/3200 train_loss:4.2951 train_time:32167ms step_avg:141.71ms step:238/3200 train_loss:4.2912 train_time:32308ms step_avg:141.70ms step:239/3200 train_loss:4.1544 train_time:32448ms step_avg:141.69ms step:240/3200 train_loss:4.3048 train_time:32588ms step_avg:141.69ms step:241/3200 train_loss:4.3258 train_time:32727ms step_avg:141.68ms step:242/3200 train_loss:4.1828 train_time:32867ms step_avg:141.67ms step:243/3200 train_loss:4.3595 train_time:33008ms step_avg:141.66ms step:244/3200 train_loss:4.2272 train_time:33148ms step_avg:141.66ms step:245/3200 train_loss:4.2774 train_time:33288ms step_avg:141.65ms step:246/3200 train_loss:4.3332 train_time:33429ms step_avg:141.65ms step:247/3200 train_loss:4.2716 train_time:33569ms step_avg:141.64ms step:248/3200 train_loss:4.2076 train_time:33709ms step_avg:141.63ms step:249/3200 train_loss:4.3369 train_time:33849ms step_avg:141.63ms step:250/3200 train_loss:4.1227 train_time:33989ms step_avg:141.62ms step:250/3200 val_loss:4.2090 train_time:34044ms step_avg:141.85ms step:251/3200 train_loss:4.1627 train_time:34141ms step_avg:141.66ms step:252/3200 train_loss:4.2773 train_time:34286ms step_avg:141.68ms step:253/3200 train_loss:4.3427 train_time:34425ms step_avg:141.67ms step:254/3200 train_loss:4.1455 train_time:34565ms step_avg:141.66ms step:255/3200 train_loss:4.0963 train_time:34704ms step_avg:141.65ms step:256/3200 train_loss:4.2639 train_time:34843ms step_avg:141.64ms step:257/3200 train_loss:4.1827 train_time:34983ms step_avg:141.63ms step:258/3200 train_loss:4.1905 train_time:35127ms step_avg:141.64ms step:259/3200 train_loss:4.1644 train_time:35270ms step_avg:141.65ms step:260/3200 train_loss:4.2132 train_time:35411ms step_avg:141.64ms step:261/3200 train_loss:4.2487 train_time:35550ms step_avg:141.64ms step:262/3200 train_loss:4.2140 train_time:35691ms step_avg:141.63ms step:263/3200 train_loss:4.1734 train_time:35833ms step_avg:141.63ms step:264/3200 train_loss:4.0942 train_time:35973ms step_avg:141.63ms step:265/3200 train_loss:4.1735 train_time:36111ms step_avg:141.61ms step:266/3200 train_loss:4.0459 train_time:36254ms step_avg:141.62ms step:267/3200 train_loss:4.1072 train_time:36395ms step_avg:141.61ms step:268/3200 train_loss:4.1128 train_time:36534ms step_avg:141.61ms step:269/3200 train_loss:4.1323 train_time:36674ms step_avg:141.60ms step:270/3200 train_loss:4.0506 train_time:36813ms step_avg:141.59ms step:271/3200 train_loss:4.2805 train_time:36953ms step_avg:141.58ms step:272/3200 train_loss:4.1777 train_time:37093ms step_avg:141.58ms step:273/3200 train_loss:4.0971 train_time:37234ms step_avg:141.58ms step:274/3200 train_loss:4.1401 train_time:37375ms step_avg:141.57ms step:275/3200 train_loss:4.2169 train_time:37514ms step_avg:141.56ms step:276/3200 train_loss:4.2450 train_time:37654ms step_avg:141.56ms step:277/3200 train_loss:4.4132 train_time:37793ms step_avg:141.55ms step:278/3200 train_loss:4.2118 train_time:37933ms step_avg:141.54ms step:279/3200 train_loss:4.2731 train_time:38073ms step_avg:141.54ms step:280/3200 train_loss:4.1805 train_time:38213ms step_avg:141.53ms step:281/3200 train_loss:4.2877 train_time:38353ms step_avg:141.52ms step:282/3200 train_loss:4.1420 train_time:38493ms step_avg:141.52ms step:283/3200 train_loss:4.1376 train_time:38632ms step_avg:141.51ms step:284/3200 train_loss:4.0799 train_time:38772ms step_avg:141.51ms step:285/3200 train_loss:4.2343 train_time:38912ms step_avg:141.50ms step:286/3200 train_loss:4.2319 train_time:39052ms step_avg:141.49ms step:287/3200 train_loss:4.2612 train_time:39192ms step_avg:141.49ms step:288/3200 train_loss:4.0933 train_time:39334ms step_avg:141.49ms step:289/3200 train_loss:4.1977 train_time:39474ms step_avg:141.48ms step:290/3200 train_loss:4.0506 train_time:39613ms step_avg:141.48ms step:291/3200 train_loss:4.0441 train_time:39754ms step_avg:141.47ms step:292/3200 train_loss:4.1217 train_time:39893ms step_avg:141.47ms step:293/3200 train_loss:4.0471 train_time:40033ms step_avg:141.46ms step:294/3200 train_loss:4.0926 train_time:40174ms step_avg:141.46ms step:295/3200 train_loss:4.1378 train_time:40314ms step_avg:141.45ms step:296/3200 train_loss:4.0157 train_time:40455ms step_avg:141.45ms step:297/3200 train_loss:4.0228 train_time:40596ms step_avg:141.45ms step:298/3200 train_loss:4.0300 train_time:40735ms step_avg:141.44ms step:299/3200 train_loss:4.1337 train_time:40874ms step_avg:141.43ms step:300/3200 train_loss:4.0086 train_time:41014ms step_avg:141.43ms step:301/3200 train_loss:4.1422 train_time:41155ms step_avg:141.42ms step:302/3200 train_loss:4.1514 train_time:41294ms step_avg:141.42ms step:303/3200 train_loss:4.1008 train_time:41434ms step_avg:141.41ms step:304/3200 train_loss:4.1508 train_time:41573ms step_avg:141.41ms step:305/3200 train_loss:4.1293 train_time:41712ms step_avg:141.40ms step:306/3200 train_loss:4.6148 train_time:41853ms step_avg:141.39ms step:307/3200 train_loss:4.1007 train_time:41993ms step_avg:141.39ms step:308/3200 train_loss:4.0087 train_time:42133ms step_avg:141.39ms step:309/3200 train_loss:4.1613 train_time:42273ms step_avg:141.38ms step:310/3200 train_loss:4.0229 train_time:42413ms step_avg:141.38ms step:311/3200 train_loss:4.2499 train_time:42554ms step_avg:141.38ms step:312/3200 train_loss:4.0998 train_time:42694ms step_avg:141.37ms step:313/3200 train_loss:4.0389 train_time:42836ms step_avg:141.37ms step:314/3200 train_loss:4.1299 train_time:42973ms step_avg:141.36ms step:315/3200 train_loss:4.2539 train_time:43112ms step_avg:141.35ms step:316/3200 train_loss:4.1180 train_time:43253ms step_avg:141.35ms step:317/3200 train_loss:3.9619 train_time:43394ms step_avg:141.35ms step:318/3200 train_loss:4.0369 train_time:43534ms step_avg:141.34ms step:319/3200 train_loss:4.0716 train_time:43674ms step_avg:141.34ms step:320/3200 train_loss:4.0452 train_time:43814ms step_avg:141.34ms step:321/3200 train_loss:4.1594 train_time:43954ms step_avg:141.33ms step:322/3200 train_loss:4.1111 train_time:44093ms step_avg:141.32ms step:323/3200 train_loss:4.0920 train_time:44232ms step_avg:141.32ms step:324/3200 train_loss:4.1775 train_time:44373ms step_avg:141.32ms step:325/3200 train_loss:4.1273 train_time:44513ms step_avg:141.31ms step:326/3200 train_loss:4.1996 train_time:44655ms step_avg:141.31ms step:327/3200 train_loss:4.0591 train_time:44795ms step_avg:141.31ms step:328/3200 train_loss:4.5598 train_time:44933ms step_avg:141.30ms step:329/3200 train_loss:4.2347 train_time:45073ms step_avg:141.29ms step:330/3200 train_loss:3.9734 train_time:45211ms step_avg:141.29ms step:331/3200 train_loss:3.9183 train_time:45353ms step_avg:141.29ms step:332/3200 train_loss:4.1372 train_time:45493ms step_avg:141.28ms step:333/3200 train_loss:4.0649 train_time:45633ms step_avg:141.28ms step:334/3200 train_loss:4.0417 train_time:45773ms step_avg:141.28ms step:335/3200 train_loss:4.0037 train_time:45914ms step_avg:141.27ms step:336/3200 train_loss:4.1763 train_time:46054ms step_avg:141.27ms step:337/3200 train_loss:4.1179 train_time:46193ms step_avg:141.26ms step:338/3200 train_loss:4.5985 train_time:46333ms step_avg:141.26ms step:339/3200 train_loss:4.0958 train_time:46473ms step_avg:141.25ms step:340/3200 train_loss:4.0443 train_time:46612ms step_avg:141.25ms step:341/3200 train_loss:4.0923 train_time:46753ms step_avg:141.25ms step:342/3200 train_loss:4.0065 train_time:46893ms step_avg:141.24ms step:343/3200 train_loss:3.9705 train_time:47032ms step_avg:141.24ms step:344/3200 train_loss:4.0188 train_time:47172ms step_avg:141.23ms step:345/3200 train_loss:4.1471 train_time:47311ms step_avg:141.23ms step:346/3200 train_loss:4.0027 train_time:47452ms step_avg:141.23ms step:347/3200 train_loss:3.9364 train_time:47591ms step_avg:141.22ms step:348/3200 train_loss:3.9714 train_time:47731ms step_avg:141.22ms step:349/3200 train_loss:4.0200 train_time:47873ms step_avg:141.22ms step:350/3200 train_loss:3.9830 train_time:48013ms step_avg:141.21ms step:351/3200 train_loss:3.7035 train_time:48153ms step_avg:141.21ms step:352/3200 train_loss:3.9734 train_time:48294ms step_avg:141.21ms step:353/3200 train_loss:4.3205 train_time:48434ms step_avg:141.21ms step:354/3200 train_loss:3.8248 train_time:48574ms step_avg:141.20ms step:355/3200 train_loss:4.0798 train_time:48713ms step_avg:141.20ms step:356/3200 train_loss:3.9511 train_time:48853ms step_avg:141.19ms step:357/3200 train_loss:4.0570 train_time:48994ms step_avg:141.19ms step:358/3200 train_loss:3.9899 train_time:49133ms step_avg:141.19ms step:359/3200 train_loss:4.0034 train_time:49273ms step_avg:141.18ms step:360/3200 train_loss:4.0397 train_time:49413ms step_avg:141.18ms step:361/3200 train_loss:3.6182 train_time:49553ms step_avg:141.18ms step:362/3200 train_loss:4.1799 train_time:49694ms step_avg:141.17ms step:363/3200 train_loss:4.0822 train_time:49834ms step_avg:141.17ms step:364/3200 train_loss:3.9986 train_time:49974ms step_avg:141.17ms step:365/3200 train_loss:3.9028 train_time:50114ms step_avg:141.17ms step:366/3200 train_loss:4.0737 train_time:50254ms step_avg:141.16ms step:367/3200 train_loss:4.0264 train_time:50393ms step_avg:141.16ms step:368/3200 train_loss:4.0150 train_time:50533ms step_avg:141.15ms step:369/3200 train_loss:3.9964 train_time:50673ms step_avg:141.15ms step:370/3200 train_loss:3.8995 train_time:50812ms step_avg:141.14ms step:371/3200 train_loss:4.0427 train_time:50954ms step_avg:141.15ms step:372/3200 train_loss:3.9187 train_time:51094ms step_avg:141.14ms step:373/3200 train_loss:3.8565 train_time:51234ms step_avg:141.14ms step:374/3200 train_loss:4.0787 train_time:51375ms step_avg:141.14ms step:375/3200 train_loss:3.9925 train_time:51514ms step_avg:141.14ms step:375/3200 val_loss:3.9899 train_time:51570ms step_avg:141.29ms step:376/3200 train_loss:3.9660 train_time:51665ms step_avg:141.16ms step:377/3200 train_loss:4.0267 train_time:51810ms step_avg:141.17ms step:378/3200 train_loss:3.9441 train_time:52117ms step_avg:141.62ms step:379/3200 train_loss:4.0014 train_time:52254ms step_avg:141.61ms step:380/3200 train_loss:4.0299 train_time:52558ms step_avg:142.05ms step:381/3200 train_loss:4.0990 train_time:52696ms step_avg:142.04ms step:382/3200 train_loss:4.0061 train_time:52834ms step_avg:142.03ms step:383/3200 train_loss:3.9775 train_time:52973ms step_avg:142.02ms step:384/3200 train_loss:3.9442 train_time:53112ms step_avg:142.01ms step:385/3200 train_loss:4.0296 train_time:53250ms step_avg:142.00ms step:386/3200 train_loss:3.9449 train_time:53390ms step_avg:141.99ms step:387/3200 train_loss:4.0509 train_time:53538ms step_avg:142.01ms step:388/3200 train_loss:4.2444 train_time:53680ms step_avg:142.01ms step:389/3200 train_loss:3.9537 train_time:53820ms step_avg:142.00ms step:390/3200 train_loss:3.9445 train_time:53959ms step_avg:142.00ms step:391/3200 train_loss:4.0474 train_time:54098ms step_avg:141.99ms step:392/3200 train_loss:3.9767 train_time:54238ms step_avg:141.98ms step:393/3200 train_loss:4.0778 train_time:54378ms step_avg:141.98ms step:394/3200 train_loss:3.9166 train_time:54522ms step_avg:141.98ms step:395/3200 train_loss:4.0485 train_time:54663ms step_avg:141.98ms step:396/3200 train_loss:3.7930 train_time:54803ms step_avg:141.98ms step:397/3200 train_loss:3.9937 train_time:54944ms step_avg:141.97ms step:398/3200 train_loss:4.0410 train_time:55083ms step_avg:141.97ms step:399/3200 train_loss:4.0425 train_time:55225ms step_avg:141.97ms step:400/3200 train_loss:3.9417 train_time:55366ms step_avg:141.96ms step:401/3200 train_loss:4.0016 train_time:55511ms step_avg:141.97ms step:402/3200 train_loss:4.0692 train_time:55644ms step_avg:141.95ms step:403/3200 train_loss:3.9999 train_time:55786ms step_avg:141.95ms step:404/3200 train_loss:4.1094 train_time:55926ms step_avg:141.94ms step:405/3200 train_loss:3.8634 train_time:56066ms step_avg:141.94ms step:406/3200 train_loss:3.9503 train_time:56206ms step_avg:141.93ms step:407/3200 train_loss:4.2462 train_time:56346ms step_avg:141.93ms step:408/3200 train_loss:3.9483 train_time:56486ms step_avg:141.93ms step:409/3200 train_loss:3.9746 train_time:56627ms step_avg:141.92ms step:410/3200 train_loss:4.0189 train_time:56768ms step_avg:141.92ms step:411/3200 train_loss:3.9016 train_time:56909ms step_avg:141.92ms step:412/3200 train_loss:3.9217 train_time:57050ms step_avg:141.91ms step:413/3200 train_loss:4.3474 train_time:57189ms step_avg:141.91ms step:414/3200 train_loss:3.8004 train_time:57330ms step_avg:141.91ms step:415/3200 train_loss:4.1675 train_time:57470ms step_avg:141.90ms step:416/3200 train_loss:3.9174 train_time:57612ms step_avg:141.90ms step:417/3200 train_loss:3.9233 train_time:57753ms step_avg:141.90ms step:418/3200 train_loss:4.1078 train_time:57893ms step_avg:141.89ms step:419/3200 train_loss:3.8492 train_time:58036ms step_avg:141.90ms step:420/3200 train_loss:3.9634 train_time:58176ms step_avg:141.89ms step:421/3200 train_loss:3.8777 train_time:58317ms step_avg:141.89ms step:422/3200 train_loss:3.8041 train_time:58457ms step_avg:141.89ms step:423/3200 train_loss:3.9320 train_time:58597ms step_avg:141.88ms step:424/3200 train_loss:4.0194 train_time:58741ms step_avg:141.89ms step:425/3200 train_loss:3.7792 train_time:58878ms step_avg:141.87ms step:426/3200 train_loss:3.9574 train_time:59020ms step_avg:141.87ms step:427/3200 train_loss:3.8484 train_time:59159ms step_avg:141.87ms step:428/3200 train_loss:4.0554 train_time:59299ms step_avg:141.86ms step:429/3200 train_loss:3.9811 train_time:59440ms step_avg:141.86ms step:430/3200 train_loss:3.9141 train_time:59580ms step_avg:141.86ms step:431/3200 train_loss:3.8854 train_time:59721ms step_avg:141.85ms step:432/3200 train_loss:3.7917 train_time:59861ms step_avg:141.85ms step:433/3200 train_loss:3.9185 train_time:60000ms step_avg:141.84ms step:434/3200 train_loss:3.9738 train_time:60140ms step_avg:141.84ms step:435/3200 train_loss:3.9340 train_time:60281ms step_avg:141.84ms step:436/3200 train_loss:3.9686 train_time:60420ms step_avg:141.83ms step:437/3200 train_loss:3.9880 train_time:60560ms step_avg:141.83ms step:438/3200 train_loss:3.8685 train_time:60701ms step_avg:141.82ms step:439/3200 train_loss:3.8767 train_time:60841ms step_avg:141.82ms step:440/3200 train_loss:3.8627 train_time:60980ms step_avg:141.82ms step:441/3200 train_loss:4.0529 train_time:61120ms step_avg:141.81ms step:442/3200 train_loss:3.9279 train_time:61259ms step_avg:141.80ms step:443/3200 train_loss:3.9009 train_time:61402ms step_avg:141.81ms step:444/3200 train_loss:3.7996 train_time:61540ms step_avg:141.80ms step:445/3200 train_loss:4.0736 train_time:61680ms step_avg:141.79ms step:446/3200 train_loss:3.9992 train_time:61820ms step_avg:141.79ms step:447/3200 train_loss:3.9809 train_time:61961ms step_avg:141.79ms step:448/3200 train_loss:3.9043 train_time:62101ms step_avg:141.78ms step:449/3200 train_loss:4.0093 train_time:62240ms step_avg:141.78ms step:450/3200 train_loss:3.8397 train_time:62380ms step_avg:141.77ms step:451/3200 train_loss:3.8744 train_time:62520ms step_avg:141.77ms step:452/3200 train_loss:3.7429 train_time:62660ms step_avg:141.76ms step:453/3200 train_loss:3.8656 train_time:62799ms step_avg:141.76ms step:454/3200 train_loss:3.8315 train_time:62940ms step_avg:141.76ms step:455/3200 train_loss:3.7950 train_time:63081ms step_avg:141.75ms step:456/3200 train_loss:4.0025 train_time:63220ms step_avg:141.75ms step:457/3200 train_loss:3.8880 train_time:63360ms step_avg:141.75ms step:458/3200 train_loss:3.9594 train_time:63499ms step_avg:141.74ms step:459/3200 train_loss:3.9876 train_time:63640ms step_avg:141.74ms step:460/3200 train_loss:3.7977 train_time:63779ms step_avg:141.73ms step:461/3200 train_loss:3.9644 train_time:63920ms step_avg:141.73ms step:462/3200 train_loss:3.8616 train_time:64060ms step_avg:141.73ms step:463/3200 train_loss:3.8822 train_time:64199ms step_avg:141.72ms step:464/3200 train_loss:3.9323 train_time:64340ms step_avg:141.72ms step:465/3200 train_loss:3.8780 train_time:64480ms step_avg:141.71ms step:466/3200 train_loss:3.8841 train_time:64620ms step_avg:141.71ms step:467/3200 train_loss:3.9733 train_time:64761ms step_avg:141.71ms step:468/3200 train_loss:3.9877 train_time:64900ms step_avg:141.70ms step:469/3200 train_loss:3.9611 train_time:65041ms step_avg:141.70ms step:470/3200 train_loss:3.8553 train_time:65180ms step_avg:141.70ms step:471/3200 train_loss:3.9294 train_time:65320ms step_avg:141.69ms step:472/3200 train_loss:3.9874 train_time:65461ms step_avg:141.69ms step:473/3200 train_loss:3.9294 train_time:65600ms step_avg:141.68ms step:474/3200 train_loss:3.8885 train_time:65740ms step_avg:141.68ms step:475/3200 train_loss:3.7438 train_time:65882ms step_avg:141.68ms step:476/3200 train_loss:4.1891 train_time:66021ms step_avg:141.68ms step:477/3200 train_loss:3.9282 train_time:66161ms step_avg:141.67ms step:478/3200 train_loss:3.7489 train_time:66301ms step_avg:141.67ms step:479/3200 train_loss:3.9711 train_time:66442ms step_avg:141.67ms step:480/3200 train_loss:3.9293 train_time:66581ms step_avg:141.66ms step:481/3200 train_loss:4.0748 train_time:66721ms step_avg:141.66ms step:482/3200 train_loss:3.8885 train_time:66862ms step_avg:141.66ms step:483/3200 train_loss:3.6882 train_time:67001ms step_avg:141.65ms step:484/3200 train_loss:3.9709 train_time:67141ms step_avg:141.65ms step:485/3200 train_loss:3.8274 train_time:67281ms step_avg:141.64ms step:486/3200 train_loss:3.8332 train_time:67421ms step_avg:141.64ms step:487/3200 train_loss:3.7656 train_time:67562ms step_avg:141.64ms step:488/3200 train_loss:3.8389 train_time:67701ms step_avg:141.63ms step:489/3200 train_loss:4.0349 train_time:67841ms step_avg:141.63ms step:490/3200 train_loss:3.8827 train_time:67980ms step_avg:141.63ms step:491/3200 train_loss:3.7686 train_time:68120ms step_avg:141.62ms step:492/3200 train_loss:3.7804 train_time:68260ms step_avg:141.62ms step:493/3200 train_loss:3.8918 train_time:68399ms step_avg:141.61ms step:494/3200 train_loss:3.7445 train_time:68540ms step_avg:141.61ms step:495/3200 train_loss:3.8781 train_time:68681ms step_avg:141.61ms step:496/3200 train_loss:3.8159 train_time:68820ms step_avg:141.61ms step:497/3200 train_loss:3.7014 train_time:68960ms step_avg:141.60ms step:498/3200 train_loss:3.8874 train_time:69098ms step_avg:141.59ms step:499/3200 train_loss:3.9735 train_time:69238ms step_avg:141.59ms step:500/3200 train_loss:3.9982 train_time:69379ms step_avg:141.59ms step:500/3200 val_loss:3.8696 train_time:69434ms step_avg:141.70ms step:501/3200 train_loss:3.9049 train_time:69530ms step_avg:141.61ms step:502/3200 train_loss:3.9650 train_time:69675ms step_avg:141.62ms step:503/3200 train_loss:3.9065 train_time:69814ms step_avg:141.61ms step:504/3200 train_loss:3.9393 train_time:69953ms step_avg:141.61ms step:505/3200 train_loss:3.8904 train_time:70091ms step_avg:141.60ms step:506/3200 train_loss:3.9884 train_time:70230ms step_avg:141.59ms step:507/3200 train_loss:3.8144 train_time:70369ms step_avg:141.59ms step:508/3200 train_loss:3.9245 train_time:70509ms step_avg:141.58ms step:509/3200 train_loss:3.9924 train_time:70653ms step_avg:141.59ms step:510/3200 train_loss:3.9339 train_time:70794ms step_avg:141.59ms step:511/3200 train_loss:3.7458 train_time:70934ms step_avg:141.59ms step:512/3200 train_loss:3.9473 train_time:71073ms step_avg:141.58ms step:513/3200 train_loss:3.8814 train_time:71212ms step_avg:141.57ms step:514/3200 train_loss:3.8448 train_time:71351ms step_avg:141.57ms step:515/3200 train_loss:3.9386 train_time:71493ms step_avg:141.57ms step:516/3200 train_loss:3.9014 train_time:71633ms step_avg:141.57ms step:517/3200 train_loss:4.2489 train_time:71774ms step_avg:141.57ms step:518/3200 train_loss:3.8522 train_time:71916ms step_avg:141.57ms step:519/3200 train_loss:3.9493 train_time:72055ms step_avg:141.56ms step:520/3200 train_loss:3.8532 train_time:72195ms step_avg:141.56ms step:521/3200 train_loss:3.8583 train_time:72335ms step_avg:141.55ms step:522/3200 train_loss:3.8098 train_time:72475ms step_avg:141.55ms step:523/3200 train_loss:3.8212 train_time:72615ms step_avg:141.55ms step:524/3200 train_loss:4.4469 train_time:72757ms step_avg:141.55ms step:525/3200 train_loss:3.9068 train_time:72899ms step_avg:141.55ms step:526/3200 train_loss:3.8499 train_time:73039ms step_avg:141.55ms step:527/3200 train_loss:3.8545 train_time:73180ms step_avg:141.55ms step:528/3200 train_loss:3.8185 train_time:73321ms step_avg:141.55ms step:529/3200 train_loss:3.7899 train_time:73462ms step_avg:141.55ms step:530/3200 train_loss:4.0062 train_time:73604ms step_avg:141.55ms step:531/3200 train_loss:3.8075 train_time:73745ms step_avg:141.55ms step:532/3200 train_loss:4.0826 train_time:73886ms step_avg:141.54ms step:533/3200 train_loss:3.8906 train_time:74027ms step_avg:141.54ms step:534/3200 train_loss:3.8222 train_time:74168ms step_avg:141.54ms step:535/3200 train_loss:3.8447 train_time:74307ms step_avg:141.54ms step:536/3200 train_loss:3.7755 train_time:74448ms step_avg:141.54ms step:537/3200 train_loss:3.9038 train_time:74587ms step_avg:141.53ms step:538/3200 train_loss:3.8962 train_time:74727ms step_avg:141.53ms step:539/3200 train_loss:3.7915 train_time:74867ms step_avg:141.53ms step:540/3200 train_loss:4.2917 train_time:75007ms step_avg:141.52ms step:541/3200 train_loss:3.8324 train_time:75148ms step_avg:141.52ms step:542/3200 train_loss:3.9460 train_time:75287ms step_avg:141.52ms step:543/3200 train_loss:3.7725 train_time:75428ms step_avg:141.52ms step:544/3200 train_loss:3.7421 train_time:75567ms step_avg:141.51ms step:545/3200 train_loss:3.8342 train_time:75707ms step_avg:141.51ms step:546/3200 train_loss:3.7590 train_time:75848ms step_avg:141.51ms step:547/3200 train_loss:3.8121 train_time:75988ms step_avg:141.50ms step:548/3200 train_loss:3.8139 train_time:76128ms step_avg:141.50ms step:549/3200 train_loss:3.7903 train_time:76267ms step_avg:141.50ms step:550/3200 train_loss:3.8874 train_time:76407ms step_avg:141.50ms step:551/3200 train_loss:3.7737 train_time:76548ms step_avg:141.49ms step:552/3200 train_loss:3.7871 train_time:76687ms step_avg:141.49ms step:553/3200 train_loss:4.1163 train_time:76827ms step_avg:141.49ms step:554/3200 train_loss:3.9138 train_time:76968ms step_avg:141.48ms step:555/3200 train_loss:3.8768 train_time:77108ms step_avg:141.48ms step:556/3200 train_loss:3.8256 train_time:77249ms step_avg:141.48ms step:557/3200 train_loss:3.8563 train_time:77388ms step_avg:141.48ms step:558/3200 train_loss:3.5314 train_time:77528ms step_avg:141.47ms step:559/3200 train_loss:3.7760 train_time:77668ms step_avg:141.47ms step:560/3200 train_loss:3.8187 train_time:77807ms step_avg:141.47ms step:561/3200 train_loss:3.8724 train_time:77948ms step_avg:141.47ms step:562/3200 train_loss:3.7754 train_time:78087ms step_avg:141.46ms step:563/3200 train_loss:3.7163 train_time:78227ms step_avg:141.46ms step:564/3200 train_loss:3.9203 train_time:78368ms step_avg:141.46ms step:565/3200 train_loss:3.7323 train_time:78508ms step_avg:141.46ms step:566/3200 train_loss:3.8536 train_time:78648ms step_avg:141.45ms step:567/3200 train_loss:3.7947 train_time:78955ms step_avg:141.75ms step:568/3200 train_loss:3.7627 train_time:79093ms step_avg:141.74ms step:569/3200 train_loss:3.8489 train_time:79232ms step_avg:141.74ms step:570/3200 train_loss:3.8145 train_time:79548ms step_avg:142.05ms step:571/3200 train_loss:3.8492 train_time:79686ms step_avg:142.04ms step:572/3200 train_loss:3.9361 train_time:79825ms step_avg:142.04ms step:573/3200 train_loss:3.8848 train_time:79965ms step_avg:142.03ms step:574/3200 train_loss:3.8914 train_time:80104ms step_avg:142.03ms step:575/3200 train_loss:3.9415 train_time:80243ms step_avg:142.02ms step:576/3200 train_loss:3.8994 train_time:80382ms step_avg:142.02ms step:577/3200 train_loss:3.9209 train_time:80530ms step_avg:142.03ms step:578/3200 train_loss:3.8476 train_time:80672ms step_avg:142.03ms step:579/3200 train_loss:3.8384 train_time:80812ms step_avg:142.02ms step:580/3200 train_loss:3.8209 train_time:80952ms step_avg:142.02ms step:581/3200 train_loss:3.7618 train_time:81090ms step_avg:142.01ms step:582/3200 train_loss:3.7918 train_time:81230ms step_avg:142.01ms step:583/3200 train_loss:4.0210 train_time:81371ms step_avg:142.01ms step:584/3200 train_loss:3.7879 train_time:81511ms step_avg:142.00ms step:585/3200 train_loss:3.7503 train_time:81652ms step_avg:142.00ms step:586/3200 train_loss:3.9347 train_time:81791ms step_avg:142.00ms step:587/3200 train_loss:3.6955 train_time:81931ms step_avg:142.00ms step:588/3200 train_loss:3.8268 train_time:82071ms step_avg:141.99ms step:589/3200 train_loss:3.8163 train_time:82210ms step_avg:141.99ms step:590/3200 train_loss:4.1668 train_time:82350ms step_avg:141.98ms step:591/3200 train_loss:3.9404 train_time:82490ms step_avg:141.98ms step:592/3200 train_loss:3.6877 train_time:82631ms step_avg:141.98ms step:593/3200 train_loss:3.6996 train_time:82770ms step_avg:141.97ms step:594/3200 train_loss:3.6825 train_time:82915ms step_avg:141.98ms step:595/3200 train_loss:3.7287 train_time:83050ms step_avg:141.97ms step:596/3200 train_loss:4.0975 train_time:83190ms step_avg:141.96ms step:597/3200 train_loss:3.8180 train_time:83331ms step_avg:141.96ms step:598/3200 train_loss:3.7495 train_time:83472ms step_avg:141.96ms step:599/3200 train_loss:3.8249 train_time:83612ms step_avg:141.96ms step:600/3200 train_loss:3.6403 train_time:83753ms step_avg:141.95ms step:601/3200 train_loss:3.7663 train_time:83894ms step_avg:141.95ms step:602/3200 train_loss:3.7967 train_time:84036ms step_avg:141.95ms step:603/3200 train_loss:3.8192 train_time:84173ms step_avg:141.94ms step:604/3200 train_loss:3.9437 train_time:84312ms step_avg:141.94ms step:605/3200 train_loss:3.7963 train_time:84453ms step_avg:141.94ms step:606/3200 train_loss:3.7863 train_time:84593ms step_avg:141.93ms step:607/3200 train_loss:3.7332 train_time:84733ms step_avg:141.93ms step:608/3200 train_loss:3.9779 train_time:84874ms step_avg:141.93ms step:609/3200 train_loss:3.8136 train_time:85014ms step_avg:141.93ms step:610/3200 train_loss:3.7869 train_time:85155ms step_avg:141.93ms step:611/3200 train_loss:3.8790 train_time:85296ms step_avg:141.92ms step:612/3200 train_loss:3.7821 train_time:85435ms step_avg:141.92ms step:613/3200 train_loss:3.7693 train_time:85575ms step_avg:141.92ms step:614/3200 train_loss:3.9330 train_time:85715ms step_avg:141.91ms step:615/3200 train_loss:3.8836 train_time:85857ms step_avg:141.91ms step:616/3200 train_loss:3.8562 train_time:85998ms step_avg:141.91ms step:617/3200 train_loss:3.7792 train_time:86139ms step_avg:141.91ms step:618/3200 train_loss:3.7351 train_time:86279ms step_avg:141.91ms step:619/3200 train_loss:3.8459 train_time:86420ms step_avg:141.90ms step:620/3200 train_loss:3.7426 train_time:86563ms step_avg:141.91ms step:621/3200 train_loss:3.7628 train_time:86704ms step_avg:141.90ms step:622/3200 train_loss:4.0720 train_time:86844ms step_avg:141.90ms step:623/3200 train_loss:3.7574 train_time:86985ms step_avg:141.90ms step:624/3200 train_loss:3.7768 train_time:87125ms step_avg:141.90ms step:625/3200 train_loss:3.8631 train_time:87268ms step_avg:141.90ms step:625/3200 val_loss:3.7934 train_time:87322ms step_avg:141.99ms step:626/3200 train_loss:3.8896 train_time:87418ms step_avg:141.91ms step:627/3200 train_loss:3.9109 train_time:87564ms step_avg:141.92ms step:628/3200 train_loss:3.8876 train_time:87704ms step_avg:141.92ms step:629/3200 train_loss:3.9311 train_time:87843ms step_avg:141.91ms step:630/3200 train_loss:3.7585 train_time:87982ms step_avg:141.91ms step:631/3200 train_loss:3.8828 train_time:88121ms step_avg:141.90ms step:632/3200 train_loss:3.9168 train_time:88259ms step_avg:141.90ms step:633/3200 train_loss:3.8208 train_time:88401ms step_avg:141.90ms step:634/3200 train_loss:3.7523 train_time:88544ms step_avg:141.90ms step:635/3200 train_loss:3.8553 train_time:88686ms step_avg:141.90ms step:636/3200 train_loss:4.1068 train_time:88827ms step_avg:141.90ms step:637/3200 train_loss:3.6996 train_time:88967ms step_avg:141.89ms step:638/3200 train_loss:3.5278 train_time:89106ms step_avg:141.89ms step:639/3200 train_loss:3.7485 train_time:89247ms step_avg:141.89ms step:640/3200 train_loss:3.7774 train_time:89388ms step_avg:141.89ms step:641/3200 train_loss:3.7399 train_time:89530ms step_avg:141.89ms step:642/3200 train_loss:3.7449 train_time:89672ms step_avg:141.89ms step:643/3200 train_loss:3.7850 train_time:89811ms step_avg:141.88ms step:644/3200 train_loss:3.7916 train_time:89952ms step_avg:141.88ms step:645/3200 train_loss:3.7242 train_time:90093ms step_avg:141.88ms step:646/3200 train_loss:3.9403 train_time:90233ms step_avg:141.88ms step:647/3200 train_loss:3.8424 train_time:90373ms step_avg:141.87ms step:648/3200 train_loss:3.8354 train_time:90514ms step_avg:141.87ms step:649/3200 train_loss:3.8643 train_time:90655ms step_avg:141.87ms step:650/3200 train_loss:3.9321 train_time:90795ms step_avg:141.87ms step:651/3200 train_loss:3.7928 train_time:90935ms step_avg:141.86ms step:652/3200 train_loss:3.9196 train_time:91074ms step_avg:141.86ms step:653/3200 train_loss:3.7499 train_time:91216ms step_avg:141.86ms step:654/3200 train_loss:3.8290 train_time:91356ms step_avg:141.86ms step:655/3200 train_loss:3.5981 train_time:91498ms step_avg:141.86ms step:656/3200 train_loss:3.7378 train_time:91637ms step_avg:141.85ms step:657/3200 train_loss:3.7457 train_time:91777ms step_avg:141.85ms step:658/3200 train_loss:3.6752 train_time:91917ms step_avg:141.85ms step:659/3200 train_loss:3.8529 train_time:92056ms step_avg:141.84ms step:660/3200 train_loss:3.7546 train_time:92196ms step_avg:141.84ms step:661/3200 train_loss:3.8482 train_time:92335ms step_avg:141.84ms step:662/3200 train_loss:3.9216 train_time:92476ms step_avg:141.83ms step:663/3200 train_loss:3.8368 train_time:92616ms step_avg:141.83ms step:664/3200 train_loss:3.7115 train_time:92756ms step_avg:141.83ms step:665/3200 train_loss:3.7943 train_time:92896ms step_avg:141.83ms step:666/3200 train_loss:3.6649 train_time:93036ms step_avg:141.82ms step:667/3200 train_loss:3.9481 train_time:93175ms step_avg:141.82ms step:668/3200 train_loss:3.7843 train_time:93315ms step_avg:141.82ms step:669/3200 train_loss:3.7869 train_time:93456ms step_avg:141.81ms step:670/3200 train_loss:3.6474 train_time:93595ms step_avg:141.81ms step:671/3200 train_loss:3.7659 train_time:93736ms step_avg:141.81ms step:672/3200 train_loss:3.7176 train_time:93877ms step_avg:141.81ms step:673/3200 train_loss:3.7439 train_time:94016ms step_avg:141.80ms step:674/3200 train_loss:4.0251 train_time:94156ms step_avg:141.80ms step:675/3200 train_loss:3.8067 train_time:94296ms step_avg:141.80ms step:676/3200 train_loss:3.8775 train_time:94436ms step_avg:141.80ms step:677/3200 train_loss:3.6596 train_time:94575ms step_avg:141.79ms step:678/3200 train_loss:3.7636 train_time:94715ms step_avg:141.79ms step:679/3200 train_loss:3.7135 train_time:94856ms step_avg:141.79ms step:680/3200 train_loss:3.8519 train_time:94996ms step_avg:141.78ms step:681/3200 train_loss:3.7474 train_time:95136ms step_avg:141.78ms step:682/3200 train_loss:3.7788 train_time:95276ms step_avg:141.78ms step:683/3200 train_loss:3.8578 train_time:95418ms step_avg:141.78ms step:684/3200 train_loss:3.8999 train_time:95555ms step_avg:141.77ms step:685/3200 train_loss:3.7983 train_time:95696ms step_avg:141.77ms step:686/3200 train_loss:3.8731 train_time:95836ms step_avg:141.77ms step:687/3200 train_loss:3.8033 train_time:95976ms step_avg:141.77ms step:688/3200 train_loss:3.8425 train_time:96117ms step_avg:141.76ms step:689/3200 train_loss:3.4716 train_time:96257ms step_avg:141.76ms step:690/3200 train_loss:3.5908 train_time:96397ms step_avg:141.76ms step:691/3200 train_loss:3.7232 train_time:96537ms step_avg:141.76ms step:692/3200 train_loss:3.6073 train_time:96677ms step_avg:141.75ms step:693/3200 train_loss:3.8165 train_time:96817ms step_avg:141.75ms step:694/3200 train_loss:3.8269 train_time:96956ms step_avg:141.75ms step:695/3200 train_loss:3.7136 train_time:97096ms step_avg:141.75ms step:696/3200 train_loss:3.7092 train_time:97237ms step_avg:141.74ms step:697/3200 train_loss:4.0220 train_time:97376ms step_avg:141.74ms step:698/3200 train_loss:3.7711 train_time:97516ms step_avg:141.74ms step:699/3200 train_loss:3.8141 train_time:97656ms step_avg:141.74ms step:700/3200 train_loss:3.9688 train_time:97797ms step_avg:141.73ms step:701/3200 train_loss:3.7398 train_time:97937ms step_avg:141.73ms step:702/3200 train_loss:3.7022 train_time:98077ms step_avg:141.73ms step:703/3200 train_loss:3.6883 train_time:98217ms step_avg:141.73ms step:704/3200 train_loss:3.6489 train_time:98356ms step_avg:141.72ms step:705/3200 train_loss:3.7343 train_time:98495ms step_avg:141.72ms step:706/3200 train_loss:3.7266 train_time:98636ms step_avg:141.72ms step:707/3200 train_loss:3.7416 train_time:98776ms step_avg:141.72ms step:708/3200 train_loss:3.8093 train_time:98915ms step_avg:141.71ms step:709/3200 train_loss:3.7620 train_time:99055ms step_avg:141.71ms step:710/3200 train_loss:3.7391 train_time:99196ms step_avg:141.71ms step:711/3200 train_loss:3.7081 train_time:99337ms step_avg:141.71ms step:712/3200 train_loss:3.7520 train_time:99475ms step_avg:141.70ms step:713/3200 train_loss:3.8119 train_time:99614ms step_avg:141.70ms step:714/3200 train_loss:3.8264 train_time:99754ms step_avg:141.70ms step:715/3200 train_loss:3.7386 train_time:99894ms step_avg:141.69ms step:716/3200 train_loss:3.7352 train_time:100036ms step_avg:141.69ms step:717/3200 train_loss:3.7597 train_time:100175ms step_avg:141.69ms step:718/3200 train_loss:3.9068 train_time:100315ms step_avg:141.69ms step:719/3200 train_loss:3.7572 train_time:100457ms step_avg:141.69ms step:720/3200 train_loss:3.8357 train_time:100594ms step_avg:141.68ms step:721/3200 train_loss:4.0017 train_time:100734ms step_avg:141.68ms step:722/3200 train_loss:3.6245 train_time:100877ms step_avg:141.68ms step:723/3200 train_loss:3.8916 train_time:101017ms step_avg:141.68ms step:724/3200 train_loss:3.9408 train_time:101157ms step_avg:141.68ms step:725/3200 train_loss:3.7273 train_time:101292ms step_avg:141.67ms step:726/3200 train_loss:3.8095 train_time:101433ms step_avg:141.67ms step:727/3200 train_loss:3.7070 train_time:101576ms step_avg:141.67ms step:728/3200 train_loss:3.7282 train_time:101714ms step_avg:141.66ms step:729/3200 train_loss:3.9040 train_time:101854ms step_avg:141.66ms step:730/3200 train_loss:3.8464 train_time:101994ms step_avg:141.66ms step:731/3200 train_loss:3.8443 train_time:102134ms step_avg:141.66ms step:732/3200 train_loss:3.7346 train_time:102277ms step_avg:141.66ms step:733/3200 train_loss:3.7596 train_time:102414ms step_avg:141.65ms step:734/3200 train_loss:3.9930 train_time:102554ms step_avg:141.65ms step:735/3200 train_loss:3.7271 train_time:102694ms step_avg:141.65ms step:736/3200 train_loss:3.7912 train_time:102834ms step_avg:141.64ms step:737/3200 train_loss:3.9064 train_time:102974ms step_avg:141.64ms step:738/3200 train_loss:3.8273 train_time:103115ms step_avg:141.64ms step:739/3200 train_loss:3.7732 train_time:103255ms step_avg:141.64ms step:740/3200 train_loss:3.6666 train_time:103394ms step_avg:141.64ms step:741/3200 train_loss:4.2999 train_time:103534ms step_avg:141.63ms step:742/3200 train_loss:3.6727 train_time:103674ms step_avg:141.63ms step:743/3200 train_loss:3.7521 train_time:103814ms step_avg:141.63ms step:744/3200 train_loss:3.7498 train_time:103954ms step_avg:141.63ms step:745/3200 train_loss:3.8120 train_time:104094ms step_avg:141.62ms step:746/3200 train_loss:3.7962 train_time:104234ms step_avg:141.62ms step:747/3200 train_loss:3.7677 train_time:104374ms step_avg:141.62ms step:748/3200 train_loss:3.7946 train_time:104516ms step_avg:141.62ms step:749/3200 train_loss:3.7263 train_time:104657ms step_avg:141.62ms step:750/3200 train_loss:3.7319 train_time:104794ms step_avg:141.61ms step:750/3200 val_loss:3.7407 train_time:104851ms step_avg:141.69ms step:751/3200 train_loss:3.7637 train_time:104947ms step_avg:141.63ms step:752/3200 train_loss:3.7260 train_time:105092ms step_avg:141.63ms step:753/3200 train_loss:3.7693 train_time:105234ms step_avg:141.63ms step:754/3200 train_loss:3.7886 train_time:105372ms step_avg:141.63ms step:755/3200 train_loss:3.7600 train_time:105511ms step_avg:141.62ms step:756/3200 train_loss:3.8373 train_time:105814ms step_avg:141.84ms step:757/3200 train_loss:3.6607 train_time:105952ms step_avg:141.84ms step:758/3200 train_loss:3.8913 train_time:106091ms step_avg:141.83ms step:759/3200 train_loss:3.8143 train_time:106230ms step_avg:141.83ms step:760/3200 train_loss:3.7498 train_time:106543ms step_avg:142.06ms step:761/3200 train_loss:3.8605 train_time:106680ms step_avg:142.05ms step:762/3200 train_loss:3.5700 train_time:106819ms step_avg:142.05ms step:763/3200 train_loss:3.7230 train_time:106957ms step_avg:142.04ms step:764/3200 train_loss:3.8352 train_time:107096ms step_avg:142.04ms step:765/3200 train_loss:3.4895 train_time:107235ms step_avg:142.03ms step:766/3200 train_loss:3.9115 train_time:107375ms step_avg:142.03ms step:767/3200 train_loss:3.7624 train_time:107523ms step_avg:142.04ms step:768/3200 train_loss:3.7244 train_time:107664ms step_avg:142.04ms step:769/3200 train_loss:3.7462 train_time:107803ms step_avg:142.03ms step:770/3200 train_loss:3.7680 train_time:107943ms step_avg:142.03ms step:771/3200 train_loss:3.8249 train_time:108083ms step_avg:142.03ms step:772/3200 train_loss:4.0516 train_time:108221ms step_avg:142.02ms step:773/3200 train_loss:3.6330 train_time:108361ms step_avg:142.02ms step:774/3200 train_loss:3.8217 train_time:108506ms step_avg:142.02ms step:775/3200 train_loss:3.8050 train_time:108647ms step_avg:142.02ms step:776/3200 train_loss:3.7741 train_time:108787ms step_avg:142.02ms step:777/3200 train_loss:3.5841 train_time:108927ms step_avg:142.02ms step:778/3200 train_loss:3.5704 train_time:109067ms step_avg:142.01ms step:779/3200 train_loss:3.6422 train_time:109206ms step_avg:142.01ms step:780/3200 train_loss:3.7331 train_time:109347ms step_avg:142.01ms step:781/3200 train_loss:3.7675 train_time:109487ms step_avg:142.01ms step:782/3200 train_loss:3.8330 train_time:109627ms step_avg:142.00ms step:783/3200 train_loss:3.7405 train_time:109768ms step_avg:142.00ms step:784/3200 train_loss:3.7391 train_time:109906ms step_avg:142.00ms step:785/3200 train_loss:3.7481 train_time:110046ms step_avg:141.99ms step:786/3200 train_loss:3.7208 train_time:110186ms step_avg:141.99ms step:787/3200 train_loss:3.6236 train_time:110326ms step_avg:141.99ms step:788/3200 train_loss:3.8855 train_time:110465ms step_avg:141.99ms step:789/3200 train_loss:3.6698 train_time:110606ms step_avg:141.98ms step:790/3200 train_loss:3.7304 train_time:110747ms step_avg:141.98ms step:791/3200 train_loss:3.7897 train_time:110887ms step_avg:141.98ms step:792/3200 train_loss:3.9281 train_time:111027ms step_avg:141.98ms step:793/3200 train_loss:3.9373 train_time:111166ms step_avg:141.97ms step:794/3200 train_loss:3.6499 train_time:111307ms step_avg:141.97ms step:795/3200 train_loss:3.7668 train_time:111447ms step_avg:141.97ms step:796/3200 train_loss:3.8289 train_time:111587ms step_avg:141.97ms step:797/3200 train_loss:3.9462 train_time:111728ms step_avg:141.97ms step:798/3200 train_loss:3.6828 train_time:111869ms step_avg:141.97ms step:799/3200 train_loss:3.8276 train_time:112008ms step_avg:141.96ms step:800/3200 train_loss:3.7237 train_time:112149ms step_avg:141.96ms step:801/3200 train_loss:3.7137 train_time:112289ms step_avg:141.96ms step:802/3200 train_loss:3.7977 train_time:112429ms step_avg:141.96ms step:803/3200 train_loss:3.6624 train_time:112568ms step_avg:141.95ms step:804/3200 train_loss:3.6927 train_time:112709ms step_avg:141.95ms step:805/3200 train_loss:3.8039 train_time:112849ms step_avg:141.95ms step:806/3200 train_loss:3.7043 train_time:112989ms step_avg:141.95ms step:807/3200 train_loss:3.7117 train_time:113129ms step_avg:141.94ms step:808/3200 train_loss:3.8080 train_time:113270ms step_avg:141.94ms step:809/3200 train_loss:3.7372 train_time:113409ms step_avg:141.94ms step:810/3200 train_loss:3.6581 train_time:113549ms step_avg:141.94ms step:811/3200 train_loss:3.7343 train_time:113689ms step_avg:141.93ms step:812/3200 train_loss:3.7724 train_time:113830ms step_avg:141.93ms step:813/3200 train_loss:3.7675 train_time:113973ms step_avg:141.93ms step:814/3200 train_loss:3.7987 train_time:114110ms step_avg:141.93ms step:815/3200 train_loss:3.7389 train_time:114250ms step_avg:141.93ms step:816/3200 train_loss:3.7319 train_time:114390ms step_avg:141.92ms step:817/3200 train_loss:3.8347 train_time:114531ms step_avg:141.92ms step:818/3200 train_loss:3.9247 train_time:114674ms step_avg:141.92ms step:819/3200 train_loss:3.7000 train_time:114810ms step_avg:141.92ms step:820/3200 train_loss:3.8934 train_time:114950ms step_avg:141.91ms step:821/3200 train_loss:3.6788 train_time:115090ms step_avg:141.91ms step:822/3200 train_loss:3.7154 train_time:115231ms step_avg:141.91ms step:823/3200 train_loss:3.8360 train_time:115372ms step_avg:141.91ms step:824/3200 train_loss:3.7526 train_time:115512ms step_avg:141.91ms step:825/3200 train_loss:3.6832 train_time:115653ms step_avg:141.91ms step:826/3200 train_loss:3.7884 train_time:115795ms step_avg:141.91ms step:827/3200 train_loss:3.6706 train_time:115936ms step_avg:141.90ms step:828/3200 train_loss:3.9054 train_time:116077ms step_avg:141.90ms step:829/3200 train_loss:3.7929 train_time:116217ms step_avg:141.90ms step:830/3200 train_loss:3.8483 train_time:116358ms step_avg:141.90ms step:831/3200 train_loss:3.7070 train_time:116498ms step_avg:141.90ms step:832/3200 train_loss:3.7557 train_time:116639ms step_avg:141.90ms step:833/3200 train_loss:3.6823 train_time:116781ms step_avg:141.90ms step:834/3200 train_loss:3.8115 train_time:116921ms step_avg:141.89ms step:835/3200 train_loss:3.6552 train_time:117061ms step_avg:141.89ms step:836/3200 train_loss:3.6265 train_time:117202ms step_avg:141.89ms step:837/3200 train_loss:3.8899 train_time:117342ms step_avg:141.89ms step:838/3200 train_loss:3.5874 train_time:117482ms step_avg:141.89ms step:839/3200 train_loss:3.7555 train_time:117622ms step_avg:141.88ms step:840/3200 train_loss:3.5975 train_time:117762ms step_avg:141.88ms step:841/3200 train_loss:3.6413 train_time:117907ms step_avg:141.89ms step:842/3200 train_loss:3.7354 train_time:118043ms step_avg:141.88ms step:843/3200 train_loss:3.7477 train_time:118183ms step_avg:141.88ms step:844/3200 train_loss:3.7498 train_time:118323ms step_avg:141.87ms step:845/3200 train_loss:3.5978 train_time:118463ms step_avg:141.87ms step:846/3200 train_loss:3.8353 train_time:118603ms step_avg:141.87ms step:847/3200 train_loss:3.7005 train_time:118743ms step_avg:141.87ms step:848/3200 train_loss:3.6576 train_time:118882ms step_avg:141.86ms step:849/3200 train_loss:3.7932 train_time:119023ms step_avg:141.86ms step:850/3200 train_loss:3.6610 train_time:119162ms step_avg:141.86ms step:851/3200 train_loss:3.6159 train_time:119303ms step_avg:141.86ms step:852/3200 train_loss:3.9077 train_time:119445ms step_avg:141.86ms step:853/3200 train_loss:3.6209 train_time:119584ms step_avg:141.85ms step:854/3200 train_loss:3.7390 train_time:119723ms step_avg:141.85ms step:855/3200 train_loss:3.8111 train_time:119862ms step_avg:141.85ms step:856/3200 train_loss:3.6948 train_time:120003ms step_avg:141.85ms step:857/3200 train_loss:3.7156 train_time:120143ms step_avg:141.85ms step:858/3200 train_loss:3.7700 train_time:120283ms step_avg:141.84ms step:859/3200 train_loss:3.6567 train_time:120424ms step_avg:141.84ms step:860/3200 train_loss:3.7389 train_time:120563ms step_avg:141.84ms step:861/3200 train_loss:3.7632 train_time:120703ms step_avg:141.84ms step:862/3200 train_loss:3.8086 train_time:120843ms step_avg:141.83ms step:863/3200 train_loss:3.7570 train_time:120985ms step_avg:141.83ms step:864/3200 train_loss:3.7409 train_time:121124ms step_avg:141.83ms step:865/3200 train_loss:3.5673 train_time:121266ms step_avg:141.83ms step:866/3200 train_loss:3.7563 train_time:121403ms step_avg:141.83ms step:867/3200 train_loss:4.0282 train_time:121543ms step_avg:141.82ms step:868/3200 train_loss:3.6186 train_time:121683ms step_avg:141.82ms step:869/3200 train_loss:3.7998 train_time:121823ms step_avg:141.82ms step:870/3200 train_loss:3.7818 train_time:121962ms step_avg:141.82ms step:871/3200 train_loss:3.6220 train_time:122106ms step_avg:141.82ms step:872/3200 train_loss:3.5999 train_time:122242ms step_avg:141.81ms step:873/3200 train_loss:3.8327 train_time:122383ms step_avg:141.81ms step:874/3200 train_loss:3.6229 train_time:122523ms step_avg:141.81ms step:875/3200 train_loss:3.3489 train_time:122662ms step_avg:141.81ms step:875/3200 val_loss:3.6960 train_time:122718ms step_avg:141.87ms step:876/3200 train_loss:3.8184 train_time:122815ms step_avg:141.82ms step:877/3200 train_loss:3.6239 train_time:122961ms step_avg:141.82ms step:878/3200 train_loss:3.7891 train_time:123100ms step_avg:141.82ms step:879/3200 train_loss:3.6499 train_time:123239ms step_avg:141.82ms step:880/3200 train_loss:3.8312 train_time:123378ms step_avg:141.81ms step:881/3200 train_loss:3.4947 train_time:123518ms step_avg:141.81ms step:882/3200 train_loss:3.6682 train_time:123656ms step_avg:141.81ms step:883/3200 train_loss:3.8601 train_time:123798ms step_avg:141.81ms step:884/3200 train_loss:4.0112 train_time:123940ms step_avg:141.81ms step:885/3200 train_loss:3.7406 train_time:124082ms step_avg:141.81ms step:886/3200 train_loss:3.6532 train_time:124222ms step_avg:141.81ms step:887/3200 train_loss:3.7468 train_time:124362ms step_avg:141.80ms step:888/3200 train_loss:4.2531 train_time:124505ms step_avg:141.81ms step:889/3200 train_loss:4.0150 train_time:124642ms step_avg:141.80ms step:890/3200 train_loss:3.6924 train_time:124784ms step_avg:141.80ms step:891/3200 train_loss:3.7057 train_time:124925ms step_avg:141.80ms step:892/3200 train_loss:3.5343 train_time:125067ms step_avg:141.80ms step:893/3200 train_loss:3.8782 train_time:125210ms step_avg:141.80ms step:894/3200 train_loss:3.6027 train_time:125347ms step_avg:141.79ms step:895/3200 train_loss:3.8651 train_time:125487ms step_avg:141.79ms step:896/3200 train_loss:3.8606 train_time:125628ms step_avg:141.79ms step:897/3200 train_loss:3.6671 train_time:125769ms step_avg:141.79ms step:898/3200 train_loss:3.7075 train_time:125909ms step_avg:141.79ms step:899/3200 train_loss:3.7575 train_time:126050ms step_avg:141.79ms step:900/3200 train_loss:3.6475 train_time:126190ms step_avg:141.79ms step:901/3200 train_loss:3.5952 train_time:126331ms step_avg:141.79ms step:902/3200 train_loss:3.7986 train_time:126470ms step_avg:141.78ms step:903/3200 train_loss:3.8025 train_time:126610ms step_avg:141.78ms step:904/3200 train_loss:3.7075 train_time:126750ms step_avg:141.78ms step:905/3200 train_loss:3.6754 train_time:126890ms step_avg:141.78ms step:906/3200 train_loss:3.6657 train_time:127031ms step_avg:141.78ms step:907/3200 train_loss:3.8946 train_time:127170ms step_avg:141.77ms step:908/3200 train_loss:3.6839 train_time:127310ms step_avg:141.77ms step:909/3200 train_loss:3.7214 train_time:127450ms step_avg:141.77ms step:910/3200 train_loss:3.6292 train_time:127591ms step_avg:141.77ms step:911/3200 train_loss:3.7210 train_time:127731ms step_avg:141.77ms step:912/3200 train_loss:3.7856 train_time:127872ms step_avg:141.76ms step:913/3200 train_loss:3.7792 train_time:128012ms step_avg:141.76ms step:914/3200 train_loss:3.6599 train_time:128152ms step_avg:141.76ms step:915/3200 train_loss:3.9034 train_time:128294ms step_avg:141.76ms step:916/3200 train_loss:3.6998 train_time:128434ms step_avg:141.76ms step:917/3200 train_loss:3.7943 train_time:128571ms step_avg:141.75ms step:918/3200 train_loss:3.7658 train_time:128712ms step_avg:141.75ms step:919/3200 train_loss:4.9831 train_time:128852ms step_avg:141.75ms step:920/3200 train_loss:3.6927 train_time:128992ms step_avg:141.75ms step:921/3200 train_loss:3.7345 train_time:129132ms step_avg:141.75ms step:922/3200 train_loss:3.6965 train_time:129271ms step_avg:141.74ms step:923/3200 train_loss:3.7566 train_time:129411ms step_avg:141.74ms step:924/3200 train_loss:3.7629 train_time:129551ms step_avg:141.74ms step:925/3200 train_loss:3.8469 train_time:129692ms step_avg:141.74ms step:926/3200 train_loss:3.8272 train_time:129832ms step_avg:141.74ms step:927/3200 train_loss:3.7238 train_time:129971ms step_avg:141.74ms step:928/3200 train_loss:3.7135 train_time:130112ms step_avg:141.73ms step:929/3200 train_loss:3.9384 train_time:130251ms step_avg:141.73ms step:930/3200 train_loss:3.7856 train_time:130392ms step_avg:141.73ms step:931/3200 train_loss:3.5695 train_time:130531ms step_avg:141.73ms step:932/3200 train_loss:3.6632 train_time:130671ms step_avg:141.73ms step:933/3200 train_loss:3.8497 train_time:130812ms step_avg:141.72ms step:934/3200 train_loss:3.5743 train_time:130951ms step_avg:141.72ms step:935/3200 train_loss:3.7469 train_time:131091ms step_avg:141.72ms step:936/3200 train_loss:3.6187 train_time:131231ms step_avg:141.72ms step:937/3200 train_loss:3.6858 train_time:131371ms step_avg:141.72ms step:938/3200 train_loss:3.7773 train_time:131511ms step_avg:141.71ms step:939/3200 train_loss:3.7039 train_time:131650ms step_avg:141.71ms step:940/3200 train_loss:3.8775 train_time:131791ms step_avg:141.71ms step:941/3200 train_loss:3.6525 train_time:131931ms step_avg:141.71ms step:942/3200 train_loss:3.7138 train_time:132071ms step_avg:141.71ms step:943/3200 train_loss:3.5221 train_time:132211ms step_avg:141.71ms step:944/3200 train_loss:3.8655 train_time:132352ms step_avg:141.70ms step:945/3200 train_loss:3.5823 train_time:132667ms step_avg:141.89ms step:946/3200 train_loss:3.5974 train_time:132797ms step_avg:141.88ms step:947/3200 train_loss:5.2236 train_time:132936ms step_avg:141.87ms step:948/3200 train_loss:3.7660 train_time:133075ms step_avg:141.87ms step:949/3200 train_loss:3.6603 train_time:133214ms step_avg:141.87ms step:950/3200 train_loss:3.5684 train_time:133524ms step_avg:142.05ms step:951/3200 train_loss:3.6180 train_time:133661ms step_avg:142.04ms step:952/3200 train_loss:3.5774 train_time:133802ms step_avg:142.04ms step:953/3200 train_loss:3.6541 train_time:133941ms step_avg:142.04ms step:954/3200 train_loss:3.7268 train_time:134081ms step_avg:142.03ms step:955/3200 train_loss:3.6086 train_time:134220ms step_avg:142.03ms step:956/3200 train_loss:3.6444 train_time:134362ms step_avg:142.03ms step:957/3200 train_loss:3.6129 train_time:134506ms step_avg:142.03ms step:958/3200 train_loss:3.6743 train_time:134648ms step_avg:142.03ms step:959/3200 train_loss:3.6689 train_time:134788ms step_avg:142.03ms step:960/3200 train_loss:3.6805 train_time:134928ms step_avg:142.03ms step:961/3200 train_loss:3.5626 train_time:135068ms step_avg:142.03ms step:962/3200 train_loss:3.8256 train_time:135208ms step_avg:142.03ms step:963/3200 train_loss:3.7762 train_time:135347ms step_avg:142.02ms step:964/3200 train_loss:3.7220 train_time:135490ms step_avg:142.02ms step:965/3200 train_loss:3.6272 train_time:135637ms step_avg:142.03ms step:966/3200 train_loss:3.6570 train_time:135772ms step_avg:142.02ms step:967/3200 train_loss:3.8742 train_time:135912ms step_avg:142.02ms step:968/3200 train_loss:3.7002 train_time:136052ms step_avg:142.02ms step:969/3200 train_loss:3.6900 train_time:136193ms step_avg:142.02ms step:970/3200 train_loss:3.7511 train_time:136332ms step_avg:142.01ms step:971/3200 train_loss:3.5662 train_time:136472ms step_avg:142.01ms step:972/3200 train_loss:3.7114 train_time:136612ms step_avg:142.01ms step:973/3200 train_loss:3.6630 train_time:136752ms step_avg:142.01ms step:974/3200 train_loss:3.7081 train_time:136891ms step_avg:142.00ms step:975/3200 train_loss:3.7862 train_time:137031ms step_avg:142.00ms step:976/3200 train_loss:3.6539 train_time:137171ms step_avg:142.00ms step:977/3200 train_loss:3.8519 train_time:137311ms step_avg:142.00ms step:978/3200 train_loss:3.7397 train_time:137451ms step_avg:141.99ms step:979/3200 train_loss:3.5717 train_time:137594ms step_avg:142.00ms step:980/3200 train_loss:3.8604 train_time:137731ms step_avg:141.99ms step:981/3200 train_loss:3.5887 train_time:137872ms step_avg:141.99ms step:982/3200 train_loss:3.7570 train_time:138011ms step_avg:141.99ms step:983/3200 train_loss:3.7333 train_time:138150ms step_avg:141.98ms step:984/3200 train_loss:3.7513 train_time:138291ms step_avg:141.98ms step:985/3200 train_loss:3.6778 train_time:138431ms step_avg:141.98ms step:986/3200 train_loss:3.7669 train_time:138572ms step_avg:141.98ms step:987/3200 train_loss:3.5859 train_time:138717ms step_avg:141.98ms step:988/3200 train_loss:3.6645 train_time:138852ms step_avg:141.98ms step:989/3200 train_loss:3.6641 train_time:138992ms step_avg:141.97ms step:990/3200 train_loss:3.6018 train_time:139132ms step_avg:141.97ms step:991/3200 train_loss:3.8161 train_time:139271ms step_avg:141.97ms step:992/3200 train_loss:3.6426 train_time:139411ms step_avg:141.97ms step:993/3200 train_loss:3.6106 train_time:139551ms step_avg:141.96ms step:994/3200 train_loss:3.6913 train_time:139691ms step_avg:141.96ms step:995/3200 train_loss:3.7700 train_time:139834ms step_avg:141.96ms step:996/3200 train_loss:3.7169 train_time:139972ms step_avg:141.96ms step:997/3200 train_loss:3.6264 train_time:140112ms step_avg:141.96ms step:998/3200 train_loss:3.9771 train_time:140251ms step_avg:141.95ms step:999/3200 train_loss:3.6362 train_time:140392ms step_avg:141.95ms step:1000/3200 train_loss:3.7562 train_time:140532ms step_avg:141.95ms step:1000/3200 val_loss:3.6581 train_time:140588ms step_avg:142.01ms step:1001/3200 train_loss:3.6304 train_time:140683ms step_avg:141.96ms step:1002/3200 train_loss:3.6822 train_time:140829ms step_avg:141.96ms step:1003/3200 train_loss:3.5633 train_time:140970ms step_avg:141.96ms step:1004/3200 train_loss:3.7524 train_time:141110ms step_avg:141.96ms step:1005/3200 train_loss:3.7997 train_time:141249ms step_avg:141.96ms step:1006/3200 train_loss:3.5722 train_time:141388ms step_avg:141.96ms step:1007/3200 train_loss:3.6564 train_time:141527ms step_avg:141.95ms step:1008/3200 train_loss:3.6189 train_time:141668ms step_avg:141.95ms step:1009/3200 train_loss:3.7384 train_time:141811ms step_avg:141.95ms step:1010/3200 train_loss:3.8378 train_time:141954ms step_avg:141.95ms step:1011/3200 train_loss:3.7425 train_time:142092ms step_avg:141.95ms step:1012/3200 train_loss:3.7054 train_time:142232ms step_avg:141.95ms step:1013/3200 train_loss:3.5648 train_time:142372ms step_avg:141.95ms step:1014/3200 train_loss:3.7098 train_time:142512ms step_avg:141.94ms step:1015/3200 train_loss:3.8192 train_time:142656ms step_avg:141.95ms step:1016/3200 train_loss:3.5306 train_time:142795ms step_avg:141.94ms step:1017/3200 train_loss:3.6164 train_time:142936ms step_avg:141.94ms step:1018/3200 train_loss:3.6198 train_time:143077ms step_avg:141.94ms step:1019/3200 train_loss:3.5689 train_time:143216ms step_avg:141.94ms step:1020/3200 train_loss:3.7065 train_time:143356ms step_avg:141.94ms step:1021/3200 train_loss:3.6176 train_time:143498ms step_avg:141.94ms step:1022/3200 train_loss:3.5531 train_time:143637ms step_avg:141.93ms step:1023/3200 train_loss:3.6573 train_time:143780ms step_avg:141.93ms step:1024/3200 train_loss:3.6877 train_time:143919ms step_avg:141.93ms step:1025/3200 train_loss:3.6626 train_time:144059ms step_avg:141.93ms step:1026/3200 train_loss:3.6768 train_time:144211ms step_avg:141.94ms step:1027/3200 train_loss:3.8363 train_time:144338ms step_avg:141.93ms step:1028/3200 train_loss:3.5209 train_time:144482ms step_avg:141.93ms step:1029/3200 train_loss:3.5811 train_time:144620ms step_avg:141.92ms step:1030/3200 train_loss:3.5360 train_time:144761ms step_avg:141.92ms step:1031/3200 train_loss:3.7011 train_time:144902ms step_avg:141.92ms step:1032/3200 train_loss:3.6868 train_time:145041ms step_avg:141.92ms step:1033/3200 train_loss:3.8695 train_time:145182ms step_avg:141.92ms step:1034/3200 train_loss:3.6828 train_time:145322ms step_avg:141.92ms step:1035/3200 train_loss:3.6111 train_time:145461ms step_avg:141.91ms step:1036/3200 train_loss:3.6264 train_time:145601ms step_avg:141.91ms step:1037/3200 train_loss:3.6832 train_time:145740ms step_avg:141.91ms step:1038/3200 train_loss:3.9919 train_time:145881ms step_avg:141.91ms step:1039/3200 train_loss:3.8103 train_time:146023ms step_avg:141.91ms step:1040/3200 train_loss:3.7057 train_time:146161ms step_avg:141.90ms step:1041/3200 train_loss:3.5971 train_time:146300ms step_avg:141.90ms step:1042/3200 train_loss:3.6695 train_time:146438ms step_avg:141.90ms step:1043/3200 train_loss:3.7074 train_time:146579ms step_avg:141.90ms step:1044/3200 train_loss:3.6421 train_time:146723ms step_avg:141.90ms step:1045/3200 train_loss:3.6488 train_time:146859ms step_avg:141.89ms step:1046/3200 train_loss:3.7282 train_time:147000ms step_avg:141.89ms step:1047/3200 train_loss:3.6227 train_time:147139ms step_avg:141.89ms step:1048/3200 train_loss:3.8343 train_time:147278ms step_avg:141.89ms step:1049/3200 train_loss:3.6947 train_time:147419ms step_avg:141.89ms step:1050/3200 train_loss:3.6160 train_time:147559ms step_avg:141.88ms step:1051/3200 train_loss:3.5814 train_time:147700ms step_avg:141.88ms step:1052/3200 train_loss:3.7012 train_time:147839ms step_avg:141.88ms step:1053/3200 train_loss:3.5785 train_time:147979ms step_avg:141.88ms step:1054/3200 train_loss:3.8977 train_time:148120ms step_avg:141.88ms step:1055/3200 train_loss:3.7334 train_time:148259ms step_avg:141.87ms step:1056/3200 train_loss:3.5907 train_time:148399ms step_avg:141.87ms step:1057/3200 train_loss:3.7009 train_time:148538ms step_avg:141.87ms step:1058/3200 train_loss:3.7712 train_time:148678ms step_avg:141.87ms step:1059/3200 train_loss:3.4926 train_time:148819ms step_avg:141.87ms step:1060/3200 train_loss:3.6218 train_time:148960ms step_avg:141.87ms step:1061/3200 train_loss:3.6491 train_time:149099ms step_avg:141.86ms step:1062/3200 train_loss:3.6059 train_time:149239ms step_avg:141.86ms step:1063/3200 train_loss:3.5850 train_time:149379ms step_avg:141.86ms step:1064/3200 train_loss:3.6840 train_time:149519ms step_avg:141.86ms step:1065/3200 train_loss:3.5832 train_time:149660ms step_avg:141.86ms step:1066/3200 train_loss:3.5724 train_time:149799ms step_avg:141.86ms step:1067/3200 train_loss:3.5926 train_time:149940ms step_avg:141.85ms step:1068/3200 train_loss:3.5107 train_time:150080ms step_avg:141.85ms step:1069/3200 train_loss:3.6225 train_time:150219ms step_avg:141.85ms step:1070/3200 train_loss:3.4942 train_time:150358ms step_avg:141.85ms step:1071/3200 train_loss:3.7439 train_time:150498ms step_avg:141.85ms step:1072/3200 train_loss:3.6984 train_time:150639ms step_avg:141.84ms step:1073/3200 train_loss:3.6463 train_time:150780ms step_avg:141.84ms step:1074/3200 train_loss:3.7157 train_time:150920ms step_avg:141.84ms step:1075/3200 train_loss:3.6571 train_time:151060ms step_avg:141.84ms step:1076/3200 train_loss:3.5951 train_time:151202ms step_avg:141.84ms step:1077/3200 train_loss:3.9881 train_time:151339ms step_avg:141.84ms step:1078/3200 train_loss:3.6704 train_time:151478ms step_avg:141.83ms step:1079/3200 train_loss:3.3521 train_time:151619ms step_avg:141.83ms step:1080/3200 train_loss:3.7210 train_time:151759ms step_avg:141.83ms step:1081/3200 train_loss:3.6493 train_time:151899ms step_avg:141.83ms step:1082/3200 train_loss:3.7106 train_time:152039ms step_avg:141.83ms step:1083/3200 train_loss:3.8100 train_time:152179ms step_avg:141.83ms step:1084/3200 train_loss:3.7040 train_time:152319ms step_avg:141.82ms step:1085/3200 train_loss:3.6837 train_time:152458ms step_avg:141.82ms step:1086/3200 train_loss:3.6460 train_time:152599ms step_avg:141.82ms step:1087/3200 train_loss:3.8374 train_time:152738ms step_avg:141.82ms step:1088/3200 train_loss:3.7312 train_time:152879ms step_avg:141.82ms step:1089/3200 train_loss:3.5613 train_time:153019ms step_avg:141.82ms step:1090/3200 train_loss:3.5803 train_time:153159ms step_avg:141.81ms step:1091/3200 train_loss:3.6964 train_time:153300ms step_avg:141.81ms step:1092/3200 train_loss:3.4988 train_time:153439ms step_avg:141.81ms step:1093/3200 train_loss:3.6920 train_time:153579ms step_avg:141.81ms step:1094/3200 train_loss:3.8248 train_time:153719ms step_avg:141.81ms step:1095/3200 train_loss:3.6696 train_time:153859ms step_avg:141.81ms step:1096/3200 train_loss:3.6155 train_time:153999ms step_avg:141.80ms step:1097/3200 train_loss:3.6401 train_time:154138ms step_avg:141.80ms step:1098/3200 train_loss:3.6871 train_time:154279ms step_avg:141.80ms step:1099/3200 train_loss:3.7609 train_time:154419ms step_avg:141.80ms step:1100/3200 train_loss:3.7192 train_time:154559ms step_avg:141.80ms step:1101/3200 train_loss:3.6549 train_time:154700ms step_avg:141.80ms step:1102/3200 train_loss:3.5037 train_time:154839ms step_avg:141.79ms step:1103/3200 train_loss:3.5668 train_time:154981ms step_avg:141.79ms step:1104/3200 train_loss:3.6544 train_time:155119ms step_avg:141.79ms step:1105/3200 train_loss:3.5367 train_time:155259ms step_avg:141.79ms step:1106/3200 train_loss:4.2920 train_time:155399ms step_avg:141.79ms step:1107/3200 train_loss:3.4384 train_time:155538ms step_avg:141.79ms step:1108/3200 train_loss:3.7766 train_time:155679ms step_avg:141.78ms step:1109/3200 train_loss:3.5669 train_time:155819ms step_avg:141.78ms step:1110/3200 train_loss:3.7101 train_time:155959ms step_avg:141.78ms step:1111/3200 train_loss:3.6373 train_time:156101ms step_avg:141.78ms step:1112/3200 train_loss:3.6839 train_time:156239ms step_avg:141.78ms step:1113/3200 train_loss:3.7735 train_time:156382ms step_avg:141.78ms step:1114/3200 train_loss:3.6328 train_time:156519ms step_avg:141.77ms step:1115/3200 train_loss:3.5714 train_time:156659ms step_avg:141.77ms step:1116/3200 train_loss:3.4763 train_time:156799ms step_avg:141.77ms step:1117/3200 train_loss:3.6488 train_time:156939ms step_avg:141.77ms step:1118/3200 train_loss:3.7950 train_time:157079ms step_avg:141.77ms step:1119/3200 train_loss:3.8385 train_time:157219ms step_avg:141.77ms step:1120/3200 train_loss:3.6715 train_time:157359ms step_avg:141.76ms step:1121/3200 train_loss:3.7002 train_time:157498ms step_avg:141.76ms step:1122/3200 train_loss:3.6042 train_time:157638ms step_avg:141.76ms step:1123/3200 train_loss:3.6637 train_time:157778ms step_avg:141.76ms step:1124/3200 train_loss:3.7986 train_time:157918ms step_avg:141.76ms step:1125/3200 train_loss:3.5652 train_time:158058ms step_avg:141.76ms step:1125/3200 val_loss:3.6303 train_time:158114ms step_avg:141.81ms step:1126/3200 train_loss:3.4666 train_time:158212ms step_avg:141.77ms step:1127/3200 train_loss:3.6910 train_time:158355ms step_avg:141.77ms step:1128/3200 train_loss:3.9126 train_time:158495ms step_avg:141.77ms step:1129/3200 train_loss:3.4419 train_time:158635ms step_avg:141.77ms step:1130/3200 train_loss:3.7657 train_time:158772ms step_avg:141.76ms step:1131/3200 train_loss:3.5987 train_time:158910ms step_avg:141.76ms step:1132/3200 train_loss:3.6255 train_time:159048ms step_avg:141.75ms step:1133/3200 train_loss:3.5827 train_time:159189ms step_avg:141.75ms step:1134/3200 train_loss:3.7457 train_time:159487ms step_avg:141.89ms step:1135/3200 train_loss:3.6708 train_time:159626ms step_avg:141.89ms step:1136/3200 train_loss:3.7241 train_time:159763ms step_avg:141.89ms step:1137/3200 train_loss:3.7614 train_time:159902ms step_avg:141.88ms step:1138/3200 train_loss:3.6779 train_time:160040ms step_avg:141.88ms step:1139/3200 train_loss:3.5809 train_time:160180ms step_avg:141.88ms step:1140/3200 train_loss:3.8724 train_time:160482ms step_avg:142.02ms step:1141/3200 train_loss:3.6792 train_time:160619ms step_avg:142.01ms step:1142/3200 train_loss:3.7888 train_time:160760ms step_avg:142.01ms step:1143/3200 train_loss:3.6670 train_time:160899ms step_avg:142.01ms step:1144/3200 train_loss:3.5829 train_time:161039ms step_avg:142.01ms step:1145/3200 train_loss:3.6890 train_time:161177ms step_avg:142.01ms step:1146/3200 train_loss:3.8067 train_time:161316ms step_avg:142.00ms step:1147/3200 train_loss:3.7776 train_time:161462ms step_avg:142.01ms step:1148/3200 train_loss:3.7029 train_time:161604ms step_avg:142.01ms step:1149/3200 train_loss:3.7131 train_time:161744ms step_avg:142.01ms step:1150/3200 train_loss:3.5658 train_time:161883ms step_avg:142.00ms step:1151/3200 train_loss:3.5842 train_time:162022ms step_avg:142.00ms step:1152/3200 train_loss:3.5509 train_time:162163ms step_avg:142.00ms step:1153/3200 train_loss:3.7091 train_time:162303ms step_avg:142.00ms step:1154/3200 train_loss:3.6678 train_time:162449ms step_avg:142.00ms step:1155/3200 train_loss:3.7303 train_time:162588ms step_avg:142.00ms step:1156/3200 train_loss:3.5829 train_time:162727ms step_avg:142.00ms step:1157/3200 train_loss:3.7485 train_time:162868ms step_avg:141.99ms step:1158/3200 train_loss:3.7087 train_time:163007ms step_avg:141.99ms step:1159/3200 train_loss:3.5169 train_time:163147ms step_avg:141.99ms step:1160/3200 train_loss:3.5544 train_time:163292ms step_avg:141.99ms step:1161/3200 train_loss:3.5436 train_time:163434ms step_avg:141.99ms step:1162/3200 train_loss:3.3657 train_time:163570ms step_avg:141.99ms step:1163/3200 train_loss:3.6650 train_time:163710ms step_avg:141.99ms step:1164/3200 train_loss:3.6269 train_time:163849ms step_avg:141.98ms step:1165/3200 train_loss:3.4967 train_time:163990ms step_avg:141.98ms step:1166/3200 train_loss:3.4958 train_time:164129ms step_avg:141.98ms step:1167/3200 train_loss:3.5975 train_time:164270ms step_avg:141.98ms step:1168/3200 train_loss:3.6063 train_time:164410ms step_avg:141.98ms step:1169/3200 train_loss:3.9230 train_time:164550ms step_avg:141.98ms step:1170/3200 train_loss:3.6082 train_time:164690ms step_avg:141.97ms step:1171/3200 train_loss:3.6253 train_time:164829ms step_avg:141.97ms step:1172/3200 train_loss:3.5366 train_time:164969ms step_avg:141.97ms step:1173/3200 train_loss:3.6225 train_time:165108ms step_avg:141.97ms step:1174/3200 train_loss:3.7580 train_time:165248ms step_avg:141.97ms step:1175/3200 train_loss:3.6028 train_time:165390ms step_avg:141.97ms step:1176/3200 train_loss:3.6189 train_time:165528ms step_avg:141.96ms step:1177/3200 train_loss:3.6687 train_time:165668ms step_avg:141.96ms step:1178/3200 train_loss:3.6594 train_time:165808ms step_avg:141.96ms step:1179/3200 train_loss:3.7085 train_time:165948ms step_avg:141.96ms step:1180/3200 train_loss:3.6252 train_time:166088ms step_avg:141.96ms step:1181/3200 train_loss:3.6243 train_time:166228ms step_avg:141.95ms step:1182/3200 train_loss:3.5721 train_time:166368ms step_avg:141.95ms step:1183/3200 train_loss:3.6283 train_time:166508ms step_avg:141.95ms step:1184/3200 train_loss:3.5489 train_time:166648ms step_avg:141.95ms step:1185/3200 train_loss:3.7181 train_time:166788ms step_avg:141.95ms step:1186/3200 train_loss:3.7864 train_time:166928ms step_avg:141.95ms step:1187/3200 train_loss:3.5787 train_time:167068ms step_avg:141.94ms step:1188/3200 train_loss:3.6375 train_time:167213ms step_avg:141.95ms step:1189/3200 train_loss:3.6583 train_time:167349ms step_avg:141.94ms step:1190/3200 train_loss:3.5029 train_time:167489ms step_avg:141.94ms step:1191/3200 train_loss:3.6774 train_time:167628ms step_avg:141.94ms step:1192/3200 train_loss:3.8179 train_time:167768ms step_avg:141.94ms step:1193/3200 train_loss:3.6119 train_time:167908ms step_avg:141.93ms step:1194/3200 train_loss:3.5107 train_time:168048ms step_avg:141.93ms step:1195/3200 train_loss:3.7913 train_time:168189ms step_avg:141.93ms step:1196/3200 train_loss:3.5988 train_time:168331ms step_avg:141.93ms step:1197/3200 train_loss:3.6063 train_time:168469ms step_avg:141.93ms step:1198/3200 train_loss:3.5087 train_time:168610ms step_avg:141.93ms step:1199/3200 train_loss:3.5165 train_time:168750ms step_avg:141.93ms step:1200/3200 train_loss:3.5689 train_time:168891ms step_avg:141.92ms step:1201/3200 train_loss:3.6518 train_time:169030ms step_avg:141.92ms step:1202/3200 train_loss:3.7263 train_time:169171ms step_avg:141.92ms step:1203/3200 train_loss:3.7578 train_time:169312ms step_avg:141.92ms step:1204/3200 train_loss:3.6392 train_time:169451ms step_avg:141.92ms step:1205/3200 train_loss:3.5655 train_time:169591ms step_avg:141.92ms step:1206/3200 train_loss:3.6530 train_time:169731ms step_avg:141.92ms step:1207/3200 train_loss:3.7033 train_time:169871ms step_avg:141.91ms step:1208/3200 train_loss:3.7475 train_time:170011ms step_avg:141.91ms step:1209/3200 train_loss:3.6220 train_time:170151ms step_avg:141.91ms step:1210/3200 train_loss:3.4915 train_time:170290ms step_avg:141.91ms step:1211/3200 train_loss:3.5303 train_time:170430ms step_avg:141.91ms step:1212/3200 train_loss:3.6281 train_time:170570ms step_avg:141.91ms step:1213/3200 train_loss:3.6479 train_time:170711ms step_avg:141.90ms step:1214/3200 train_loss:3.6768 train_time:170850ms step_avg:141.90ms step:1215/3200 train_loss:3.5693 train_time:170990ms step_avg:141.90ms step:1216/3200 train_loss:3.6258 train_time:171130ms step_avg:141.90ms step:1217/3200 train_loss:3.5725 train_time:171269ms step_avg:141.90ms step:1218/3200 train_loss:3.5678 train_time:171409ms step_avg:141.90ms step:1219/3200 train_loss:3.6543 train_time:171550ms step_avg:141.89ms step:1220/3200 train_loss:3.5027 train_time:171690ms step_avg:141.89ms step:1221/3200 train_loss:3.7207 train_time:171831ms step_avg:141.89ms step:1222/3200 train_loss:3.7453 train_time:171970ms step_avg:141.89ms step:1223/3200 train_loss:3.6835 train_time:172109ms step_avg:141.89ms step:1224/3200 train_loss:3.5265 train_time:172249ms step_avg:141.89ms step:1225/3200 train_loss:3.5144 train_time:172390ms step_avg:141.88ms step:1226/3200 train_loss:3.5986 train_time:172528ms step_avg:141.88ms step:1227/3200 train_loss:3.5836 train_time:172669ms step_avg:141.88ms step:1228/3200 train_loss:3.5211 train_time:172809ms step_avg:141.88ms step:1229/3200 train_loss:3.6872 train_time:172949ms step_avg:141.88ms step:1230/3200 train_loss:3.6152 train_time:173089ms step_avg:141.88ms step:1231/3200 train_loss:3.6672 train_time:173228ms step_avg:141.87ms step:1232/3200 train_loss:3.8203 train_time:173368ms step_avg:141.87ms step:1233/3200 train_loss:3.7258 train_time:173508ms step_avg:141.87ms step:1234/3200 train_loss:3.6599 train_time:173649ms step_avg:141.87ms step:1235/3200 train_loss:3.8106 train_time:173789ms step_avg:141.87ms step:1236/3200 train_loss:3.5722 train_time:173928ms step_avg:141.87ms step:1237/3200 train_loss:3.5376 train_time:174068ms step_avg:141.86ms step:1238/3200 train_loss:3.4940 train_time:174208ms step_avg:141.86ms step:1239/3200 train_loss:3.5660 train_time:174348ms step_avg:141.86ms step:1240/3200 train_loss:3.5735 train_time:174489ms step_avg:141.86ms step:1241/3200 train_loss:3.6228 train_time:174627ms step_avg:141.86ms step:1242/3200 train_loss:3.6722 train_time:174767ms step_avg:141.86ms step:1243/3200 train_loss:3.5397 train_time:174907ms step_avg:141.85ms step:1244/3200 train_loss:3.6382 train_time:175047ms step_avg:141.85ms step:1245/3200 train_loss:3.6567 train_time:175187ms step_avg:141.85ms step:1246/3200 train_loss:3.6538 train_time:175326ms step_avg:141.85ms step:1247/3200 train_loss:3.4862 train_time:175468ms step_avg:141.85ms step:1248/3200 train_loss:3.6209 train_time:175608ms step_avg:141.85ms step:1249/3200 train_loss:3.6843 train_time:175748ms step_avg:141.85ms step:1250/3200 train_loss:3.6502 train_time:175887ms step_avg:141.84ms step:1250/3200 val_loss:3.6023 train_time:175943ms step_avg:141.89ms step:1251/3200 train_loss:3.5499 train_time:176040ms step_avg:141.85ms step:1252/3200 train_loss:3.7576 train_time:176186ms step_avg:141.86ms step:1253/3200 train_loss:3.6207 train_time:176328ms step_avg:141.86ms step:1254/3200 train_loss:3.5483 train_time:176467ms step_avg:141.85ms step:1255/3200 train_loss:3.6879 train_time:176606ms step_avg:141.85ms step:1256/3200 train_loss:3.7531 train_time:176745ms step_avg:141.85ms step:1257/3200 train_loss:3.5573 train_time:176885ms step_avg:141.85ms step:1258/3200 train_loss:3.5895 train_time:177028ms step_avg:141.85ms step:1259/3200 train_loss:3.6138 train_time:177173ms step_avg:141.85ms step:1260/3200 train_loss:3.5906 train_time:177314ms step_avg:141.85ms step:1261/3200 train_loss:3.4429 train_time:177453ms step_avg:141.85ms step:1262/3200 train_loss:3.5500 train_time:177592ms step_avg:141.85ms step:1263/3200 train_loss:3.6150 train_time:177732ms step_avg:141.85ms step:1264/3200 train_loss:3.4666 train_time:177875ms step_avg:141.85ms step:1265/3200 train_loss:3.6842 train_time:178011ms step_avg:141.84ms step:1266/3200 train_loss:3.6641 train_time:178155ms step_avg:141.84ms step:1267/3200 train_loss:3.6702 train_time:178296ms step_avg:141.84ms step:1268/3200 train_loss:3.6130 train_time:178437ms step_avg:141.84ms step:1269/3200 train_loss:3.6504 train_time:178577ms step_avg:141.84ms step:1270/3200 train_loss:3.5125 train_time:178716ms step_avg:141.84ms step:1271/3200 train_loss:3.3583 train_time:178856ms step_avg:141.84ms step:1272/3200 train_loss:3.6336 train_time:178997ms step_avg:141.84ms step:1273/3200 train_loss:3.6015 train_time:179137ms step_avg:141.83ms step:1274/3200 train_loss:3.6503 train_time:179282ms step_avg:141.84ms step:1275/3200 train_loss:3.5969 train_time:179420ms step_avg:141.83ms step:1276/3200 train_loss:3.6935 train_time:179560ms step_avg:141.83ms step:1277/3200 train_loss:3.7116 train_time:179701ms step_avg:141.83ms step:1278/3200 train_loss:3.6685 train_time:179840ms step_avg:141.83ms step:1279/3200 train_loss:3.6654 train_time:179980ms step_avg:141.83ms step:1280/3200 train_loss:3.4937 train_time:180121ms step_avg:141.83ms step:1281/3200 train_loss:3.6131 train_time:180261ms step_avg:141.83ms step:1282/3200 train_loss:3.6729 train_time:180402ms step_avg:141.83ms step:1283/3200 train_loss:3.7128 train_time:180542ms step_avg:141.82ms step:1284/3200 train_loss:3.5952 train_time:180683ms step_avg:141.82ms step:1285/3200 train_loss:3.6219 train_time:180823ms step_avg:141.82ms step:1286/3200 train_loss:3.6070 train_time:180963ms step_avg:141.82ms step:1287/3200 train_loss:3.5835 train_time:181104ms step_avg:141.82ms step:1288/3200 train_loss:3.7210 train_time:181246ms step_avg:141.82ms step:1289/3200 train_loss:3.5547 train_time:181386ms step_avg:141.82ms step:1290/3200 train_loss:3.6372 train_time:181527ms step_avg:141.82ms step:1291/3200 train_loss:3.7047 train_time:181669ms step_avg:141.82ms step:1292/3200 train_loss:3.6337 train_time:181808ms step_avg:141.82ms step:1293/3200 train_loss:3.7345 train_time:181950ms step_avg:141.82ms step:1294/3200 train_loss:3.7582 train_time:182091ms step_avg:141.82ms step:1295/3200 train_loss:3.7164 train_time:182232ms step_avg:141.81ms step:1296/3200 train_loss:3.5345 train_time:182373ms step_avg:141.81ms step:1297/3200 train_loss:3.6054 train_time:182513ms step_avg:141.81ms step:1298/3200 train_loss:3.5100 train_time:182654ms step_avg:141.81ms step:1299/3200 train_loss:3.5848 train_time:182793ms step_avg:141.81ms step:1300/3200 train_loss:3.6495 train_time:182932ms step_avg:141.81ms step:1301/3200 train_loss:3.6529 train_time:183073ms step_avg:141.81ms step:1302/3200 train_loss:3.6559 train_time:183213ms step_avg:141.81ms step:1303/3200 train_loss:3.8142 train_time:183353ms step_avg:141.80ms step:1304/3200 train_loss:3.5838 train_time:183493ms step_avg:141.80ms step:1305/3200 train_loss:3.7932 train_time:183633ms step_avg:141.80ms step:1306/3200 train_loss:3.5150 train_time:183774ms step_avg:141.80ms step:1307/3200 train_loss:3.7113 train_time:183913ms step_avg:141.80ms step:1308/3200 train_loss:3.7099 train_time:184052ms step_avg:141.80ms step:1309/3200 train_loss:3.5680 train_time:184193ms step_avg:141.80ms step:1310/3200 train_loss:3.5490 train_time:184333ms step_avg:141.79ms step:1311/3200 train_loss:3.5529 train_time:184473ms step_avg:141.79ms step:1312/3200 train_loss:3.5391 train_time:184612ms step_avg:141.79ms step:1313/3200 train_loss:3.6553 train_time:184752ms step_avg:141.79ms step:1314/3200 train_loss:3.5990 train_time:184893ms step_avg:141.79ms step:1315/3200 train_loss:3.3261 train_time:185033ms step_avg:141.79ms step:1316/3200 train_loss:3.5528 train_time:185173ms step_avg:141.79ms step:1317/3200 train_loss:3.6288 train_time:185314ms step_avg:141.79ms step:1318/3200 train_loss:3.6580 train_time:185454ms step_avg:141.78ms step:1319/3200 train_loss:3.5330 train_time:185595ms step_avg:141.78ms step:1320/3200 train_loss:3.6690 train_time:185735ms step_avg:141.78ms step:1321/3200 train_loss:3.7268 train_time:185874ms step_avg:141.78ms step:1322/3200 train_loss:3.6167 train_time:186014ms step_avg:141.78ms step:1323/3200 train_loss:3.5563 train_time:186322ms step_avg:141.91ms step:1324/3200 train_loss:3.5940 train_time:186453ms step_avg:141.90ms step:1325/3200 train_loss:3.6893 train_time:186591ms step_avg:141.89ms step:1326/3200 train_loss:3.7362 train_time:186730ms step_avg:141.89ms step:1327/3200 train_loss:3.5000 train_time:186869ms step_avg:141.89ms step:1328/3200 train_loss:3.4249 train_time:187009ms step_avg:141.89ms step:1329/3200 train_loss:3.7213 train_time:187147ms step_avg:141.89ms step:1330/3200 train_loss:3.5678 train_time:187451ms step_avg:142.01ms step:1331/3200 train_loss:3.7053 train_time:187587ms step_avg:142.00ms step:1332/3200 train_loss:3.6088 train_time:187728ms step_avg:142.00ms step:1333/3200 train_loss:4.0041 train_time:187866ms step_avg:142.00ms step:1334/3200 train_loss:3.7044 train_time:188005ms step_avg:142.00ms step:1335/3200 train_loss:3.6125 train_time:188147ms step_avg:142.00ms step:1336/3200 train_loss:3.5668 train_time:188284ms step_avg:141.99ms step:1337/3200 train_loss:3.5524 train_time:188430ms step_avg:142.00ms step:1338/3200 train_loss:3.8137 train_time:188572ms step_avg:142.00ms step:1339/3200 train_loss:3.7518 train_time:188711ms step_avg:141.99ms step:1340/3200 train_loss:3.5937 train_time:188850ms step_avg:141.99ms step:1341/3200 train_loss:3.5478 train_time:188990ms step_avg:141.99ms step:1342/3200 train_loss:3.8564 train_time:189129ms step_avg:141.99ms step:1343/3200 train_loss:3.6266 train_time:189270ms step_avg:141.99ms step:1344/3200 train_loss:3.6232 train_time:189414ms step_avg:141.99ms step:1345/3200 train_loss:3.6754 train_time:189555ms step_avg:141.99ms step:1346/3200 train_loss:3.6524 train_time:189695ms step_avg:141.99ms step:1347/3200 train_loss:3.5438 train_time:189834ms step_avg:141.99ms step:1348/3200 train_loss:3.4985 train_time:189973ms step_avg:141.98ms step:1349/3200 train_loss:3.5912 train_time:190112ms step_avg:141.98ms step:1350/3200 train_loss:3.5190 train_time:190252ms step_avg:141.98ms step:1351/3200 train_loss:3.6573 train_time:190395ms step_avg:141.98ms step:1352/3200 train_loss:3.5008 train_time:190536ms step_avg:141.98ms step:1353/3200 train_loss:3.5646 train_time:190675ms step_avg:141.98ms step:1354/3200 train_loss:3.6679 train_time:190815ms step_avg:141.98ms step:1355/3200 train_loss:3.5165 train_time:190955ms step_avg:141.97ms step:1356/3200 train_loss:3.4407 train_time:191096ms step_avg:141.97ms step:1357/3200 train_loss:3.7781 train_time:191236ms step_avg:141.97ms step:1358/3200 train_loss:3.7134 train_time:191375ms step_avg:141.97ms step:1359/3200 train_loss:3.4357 train_time:191517ms step_avg:141.97ms step:1360/3200 train_loss:3.7117 train_time:191655ms step_avg:141.97ms step:1361/3200 train_loss:3.5943 train_time:191796ms step_avg:141.97ms step:1362/3200 train_loss:3.4653 train_time:191933ms step_avg:141.96ms step:1363/3200 train_loss:3.6401 train_time:192073ms step_avg:141.96ms step:1364/3200 train_loss:3.5291 train_time:192212ms step_avg:141.96ms step:1365/3200 train_loss:3.5543 train_time:192353ms step_avg:141.96ms step:1366/3200 train_loss:3.5778 train_time:192494ms step_avg:141.96ms step:1367/3200 train_loss:3.6783 train_time:192633ms step_avg:141.96ms step:1368/3200 train_loss:3.6620 train_time:192773ms step_avg:141.95ms step:1369/3200 train_loss:3.6184 train_time:192912ms step_avg:141.95ms step:1370/3200 train_loss:3.5223 train_time:193053ms step_avg:141.95ms step:1371/3200 train_loss:3.8467 train_time:193193ms step_avg:141.95ms step:1372/3200 train_loss:3.5905 train_time:193333ms step_avg:141.95ms step:1373/3200 train_loss:3.6288 train_time:193474ms step_avg:141.95ms step:1374/3200 train_loss:3.6220 train_time:193613ms step_avg:141.95ms step:1375/3200 train_loss:3.4210 train_time:193754ms step_avg:141.94ms step:1375/3200 val_loss:3.5810 train_time:193809ms step_avg:141.98ms step:1376/3200 train_loss:3.8233 train_time:193908ms step_avg:141.95ms step:1377/3200 train_loss:3.6009 train_time:194050ms step_avg:141.95ms step:1378/3200 train_loss:3.7471 train_time:194191ms step_avg:141.95ms step:1379/3200 train_loss:3.7955 train_time:194331ms step_avg:141.95ms step:1380/3200 train_loss:3.4301 train_time:194470ms step_avg:141.95ms step:1381/3200 train_loss:3.5810 train_time:194609ms step_avg:141.95ms step:1382/3200 train_loss:4.0515 train_time:194750ms step_avg:141.95ms step:1383/3200 train_loss:3.5031 train_time:194892ms step_avg:141.95ms step:1384/3200 train_loss:3.6566 train_time:195037ms step_avg:141.95ms step:1385/3200 train_loss:3.7308 train_time:195178ms step_avg:141.95ms step:1386/3200 train_loss:3.6450 train_time:195317ms step_avg:141.95ms step:1387/3200 train_loss:3.6456 train_time:195457ms step_avg:141.94ms step:1388/3200 train_loss:3.4637 train_time:195595ms step_avg:141.94ms step:1389/3200 train_loss:3.6069 train_time:195735ms step_avg:141.94ms step:1390/3200 train_loss:3.5793 train_time:195876ms step_avg:141.94ms step:1391/3200 train_loss:3.8457 train_time:196020ms step_avg:141.94ms step:1392/3200 train_loss:3.5563 train_time:196160ms step_avg:141.94ms step:1393/3200 train_loss:3.5502 train_time:196300ms step_avg:141.94ms step:1394/3200 train_loss:3.5142 train_time:196439ms step_avg:141.94ms step:1395/3200 train_loss:3.7993 train_time:196578ms step_avg:141.93ms step:1396/3200 train_loss:3.6959 train_time:196718ms step_avg:141.93ms step:1397/3200 train_loss:3.7005 train_time:196858ms step_avg:141.93ms step:1398/3200 train_loss:3.5616 train_time:196999ms step_avg:141.93ms step:1399/3200 train_loss:3.5457 train_time:197140ms step_avg:141.93ms step:1400/3200 train_loss:3.5993 train_time:197280ms step_avg:141.93ms step:1401/3200 train_loss:3.5779 train_time:197419ms step_avg:141.93ms step:1402/3200 train_loss:3.5948 train_time:197558ms step_avg:141.92ms step:1403/3200 train_loss:3.5594 train_time:197696ms step_avg:141.92ms step:1404/3200 train_loss:3.7919 train_time:197836ms step_avg:141.92ms step:1405/3200 train_loss:3.5387 train_time:197977ms step_avg:141.92ms step:1406/3200 train_loss:3.5804 train_time:198119ms step_avg:141.92ms step:1407/3200 train_loss:3.5828 train_time:198260ms step_avg:141.92ms step:1408/3200 train_loss:3.4504 train_time:198400ms step_avg:141.92ms step:1409/3200 train_loss:3.5657 train_time:198539ms step_avg:141.91ms step:1410/3200 train_loss:3.5454 train_time:198678ms step_avg:141.91ms step:1411/3200 train_loss:3.5445 train_time:198818ms step_avg:141.91ms step:1412/3200 train_loss:3.6331 train_time:198959ms step_avg:141.91ms step:1413/3200 train_loss:3.5729 train_time:199098ms step_avg:141.91ms step:1414/3200 train_loss:3.6167 train_time:199238ms step_avg:141.91ms step:1415/3200 train_loss:3.6092 train_time:199380ms step_avg:141.91ms step:1416/3200 train_loss:3.6879 train_time:199518ms step_avg:141.90ms step:1417/3200 train_loss:3.4912 train_time:199658ms step_avg:141.90ms step:1418/3200 train_loss:3.5565 train_time:199797ms step_avg:141.90ms step:1419/3200 train_loss:3.6502 train_time:199937ms step_avg:141.90ms step:1420/3200 train_loss:3.6759 train_time:200079ms step_avg:141.90ms step:1421/3200 train_loss:3.6580 train_time:200218ms step_avg:141.90ms step:1422/3200 train_loss:3.6388 train_time:200358ms step_avg:141.90ms step:1423/3200 train_loss:3.6203 train_time:200498ms step_avg:141.90ms step:1424/3200 train_loss:3.6098 train_time:200638ms step_avg:141.89ms step:1425/3200 train_loss:3.6057 train_time:200777ms step_avg:141.89ms step:1426/3200 train_loss:3.4814 train_time:200917ms step_avg:141.89ms step:1427/3200 train_loss:3.5897 train_time:201058ms step_avg:141.89ms step:1428/3200 train_loss:3.5312 train_time:201198ms step_avg:141.89ms step:1429/3200 train_loss:3.6482 train_time:201337ms step_avg:141.89ms step:1430/3200 train_loss:3.6078 train_time:201477ms step_avg:141.89ms step:1431/3200 train_loss:3.5421 train_time:201617ms step_avg:141.88ms step:1432/3200 train_loss:3.5887 train_time:201757ms step_avg:141.88ms step:1433/3200 train_loss:3.6257 train_time:201896ms step_avg:141.88ms step:1434/3200 train_loss:3.4946 train_time:202036ms step_avg:141.88ms step:1435/3200 train_loss:3.5996 train_time:202176ms step_avg:141.88ms step:1436/3200 train_loss:3.4224 train_time:202317ms step_avg:141.88ms step:1437/3200 train_loss:3.4862 train_time:202458ms step_avg:141.88ms step:1438/3200 train_loss:3.6786 train_time:202597ms step_avg:141.87ms step:1439/3200 train_loss:3.6423 train_time:202738ms step_avg:141.87ms step:1440/3200 train_loss:3.5851 train_time:202877ms step_avg:141.87ms step:1441/3200 train_loss:3.4467 train_time:203018ms step_avg:141.87ms step:1442/3200 train_loss:3.6133 train_time:203160ms step_avg:141.87ms step:1443/3200 train_loss:3.6703 train_time:203297ms step_avg:141.87ms step:1444/3200 train_loss:3.7486 train_time:203437ms step_avg:141.87ms step:1445/3200 train_loss:3.7155 train_time:203578ms step_avg:141.87ms step:1446/3200 train_loss:3.6031 train_time:203718ms step_avg:141.86ms step:1447/3200 train_loss:3.4704 train_time:203857ms step_avg:141.86ms step:1448/3200 train_loss:3.5431 train_time:203996ms step_avg:141.86ms step:1449/3200 train_loss:3.5672 train_time:204137ms step_avg:141.86ms step:1450/3200 train_loss:3.6905 train_time:204277ms step_avg:141.86ms step:1451/3200 train_loss:3.6710 train_time:204417ms step_avg:141.86ms step:1452/3200 train_loss:3.4865 train_time:204558ms step_avg:141.86ms step:1453/3200 train_loss:3.6111 train_time:204698ms step_avg:141.86ms step:1454/3200 train_loss:3.5216 train_time:204838ms step_avg:141.85ms step:1455/3200 train_loss:3.5566 train_time:204978ms step_avg:141.85ms step:1456/3200 train_loss:3.6049 train_time:205118ms step_avg:141.85ms step:1457/3200 train_loss:3.5302 train_time:205258ms step_avg:141.85ms step:1458/3200 train_loss:3.4327 train_time:205398ms step_avg:141.85ms step:1459/3200 train_loss:3.6730 train_time:205538ms step_avg:141.85ms step:1460/3200 train_loss:3.5431 train_time:205679ms step_avg:141.85ms step:1461/3200 train_loss:3.5914 train_time:205818ms step_avg:141.85ms step:1462/3200 train_loss:3.7234 train_time:205958ms step_avg:141.84ms step:1463/3200 train_loss:3.5379 train_time:206098ms step_avg:141.84ms step:1464/3200 train_loss:3.7304 train_time:206239ms step_avg:141.84ms step:1465/3200 train_loss:3.6207 train_time:206378ms step_avg:141.84ms step:1466/3200 train_loss:3.6318 train_time:206518ms step_avg:141.84ms step:1467/3200 train_loss:3.5427 train_time:206659ms step_avg:141.84ms step:1468/3200 train_loss:3.7045 train_time:206799ms step_avg:141.84ms step:1469/3200 train_loss:3.5665 train_time:206938ms step_avg:141.84ms step:1470/3200 train_loss:3.5419 train_time:207078ms step_avg:141.83ms step:1471/3200 train_loss:3.5883 train_time:207218ms step_avg:141.83ms step:1472/3200 train_loss:3.5135 train_time:207359ms step_avg:141.83ms step:1473/3200 train_loss:3.6087 train_time:207498ms step_avg:141.83ms step:1474/3200 train_loss:3.7012 train_time:207638ms step_avg:141.83ms step:1475/3200 train_loss:3.5784 train_time:207778ms step_avg:141.83ms step:1476/3200 train_loss:3.4010 train_time:207918ms step_avg:141.83ms step:1477/3200 train_loss:3.5273 train_time:208057ms step_avg:141.82ms step:1478/3200 train_loss:3.4993 train_time:208197ms step_avg:141.82ms step:1479/3200 train_loss:3.5849 train_time:208337ms step_avg:141.82ms step:1480/3200 train_loss:3.6707 train_time:208477ms step_avg:141.82ms step:1481/3200 train_loss:3.5386 train_time:208618ms step_avg:141.82ms step:1482/3200 train_loss:3.7087 train_time:208758ms step_avg:141.82ms step:1483/3200 train_loss:3.6392 train_time:208897ms step_avg:141.82ms step:1484/3200 train_loss:3.5435 train_time:209038ms step_avg:141.82ms step:1485/3200 train_loss:3.5401 train_time:209179ms step_avg:141.82ms step:1486/3200 train_loss:3.5262 train_time:209319ms step_avg:141.81ms step:1487/3200 train_loss:3.5086 train_time:209459ms step_avg:141.81ms step:1488/3200 train_loss:3.5971 train_time:209598ms step_avg:141.81ms step:1489/3200 train_loss:3.5038 train_time:209738ms step_avg:141.81ms step:1490/3200 train_loss:3.5960 train_time:209878ms step_avg:141.81ms step:1491/3200 train_loss:3.5345 train_time:210018ms step_avg:141.81ms step:1492/3200 train_loss:3.4509 train_time:210159ms step_avg:141.81ms step:1493/3200 train_loss:3.5298 train_time:210297ms step_avg:141.80ms step:1494/3200 train_loss:3.7029 train_time:210437ms step_avg:141.80ms step:1495/3200 train_loss:3.5623 train_time:210577ms step_avg:141.80ms step:1496/3200 train_loss:3.3156 train_time:210717ms step_avg:141.80ms step:1497/3200 train_loss:3.6162 train_time:210859ms step_avg:141.80ms step:1498/3200 train_loss:3.5768 train_time:210999ms step_avg:141.80ms step:1499/3200 train_loss:3.6272 train_time:211138ms step_avg:141.80ms step:1500/3200 train_loss:3.5792 train_time:211278ms step_avg:141.80ms step:1500/3200 val_loss:3.5608 train_time:211333ms step_avg:141.83ms step:1501/3200 train_loss:3.5642 train_time:211429ms step_avg:141.80ms step:1502/3200 train_loss:3.3596 train_time:211574ms step_avg:141.81ms step:1503/3200 train_loss:3.6346 train_time:211715ms step_avg:141.81ms step:1504/3200 train_loss:3.5058 train_time:211854ms step_avg:141.80ms step:1505/3200 train_loss:3.5202 train_time:211993ms step_avg:141.80ms step:1506/3200 train_loss:3.4755 train_time:212134ms step_avg:141.80ms step:1507/3200 train_loss:3.5619 train_time:212270ms step_avg:141.80ms step:1508/3200 train_loss:3.4741 train_time:212412ms step_avg:141.80ms step:1509/3200 train_loss:3.7896 train_time:212558ms step_avg:141.80ms step:1510/3200 train_loss:3.5329 train_time:212699ms step_avg:141.80ms step:1511/3200 train_loss:3.5378 train_time:212839ms step_avg:141.80ms step:1512/3200 train_loss:3.6589 train_time:213137ms step_avg:141.90ms step:1513/3200 train_loss:3.6945 train_time:213275ms step_avg:141.90ms step:1514/3200 train_loss:3.5560 train_time:213414ms step_avg:141.90ms step:1515/3200 train_loss:3.3898 train_time:213553ms step_avg:141.90ms step:1516/3200 train_loss:3.5117 train_time:213692ms step_avg:141.89ms step:1517/3200 train_loss:3.5153 train_time:213831ms step_avg:141.89ms step:1518/3200 train_loss:3.5963 train_time:213971ms step_avg:141.89ms step:1519/3200 train_loss:3.4847 train_time:214116ms step_avg:141.89ms step:1520/3200 train_loss:3.7716 train_time:214419ms step_avg:142.00ms step:1521/3200 train_loss:3.4418 train_time:214554ms step_avg:142.00ms step:1522/3200 train_loss:3.5002 train_time:214693ms step_avg:141.99ms step:1523/3200 train_loss:3.6472 train_time:214831ms step_avg:141.99ms step:1524/3200 train_loss:3.5036 train_time:214971ms step_avg:141.99ms step:1525/3200 train_loss:3.5967 train_time:215111ms step_avg:141.99ms step:1526/3200 train_loss:3.5901 train_time:215252ms step_avg:141.99ms step:1527/3200 train_loss:3.5569 train_time:215394ms step_avg:141.99ms step:1528/3200 train_loss:3.5521 train_time:215537ms step_avg:141.99ms step:1529/3200 train_loss:3.7042 train_time:215677ms step_avg:141.99ms step:1530/3200 train_loss:3.6778 train_time:215816ms step_avg:141.98ms step:1531/3200 train_loss:3.5104 train_time:215957ms step_avg:141.98ms step:1532/3200 train_loss:3.4755 train_time:216095ms step_avg:141.98ms step:1533/3200 train_loss:3.6249 train_time:216235ms step_avg:141.98ms step:1534/3200 train_loss:3.5768 train_time:216378ms step_avg:141.98ms step:1535/3200 train_loss:3.5593 train_time:216519ms step_avg:141.98ms step:1536/3200 train_loss:3.5563 train_time:216659ms step_avg:141.98ms step:1537/3200 train_loss:3.4940 train_time:216798ms step_avg:141.98ms step:1538/3200 train_loss:3.5569 train_time:216938ms step_avg:141.98ms step:1539/3200 train_loss:3.7285 train_time:217076ms step_avg:141.97ms step:1540/3200 train_loss:3.6596 train_time:217217ms step_avg:141.97ms step:1541/3200 train_loss:3.5722 train_time:217358ms step_avg:141.97ms step:1542/3200 train_loss:3.5188 train_time:217499ms step_avg:141.97ms step:1543/3200 train_loss:3.5263 train_time:217639ms step_avg:141.97ms step:1544/3200 train_loss:3.4855 train_time:217781ms step_avg:141.97ms step:1545/3200 train_loss:3.5736 train_time:217918ms step_avg:141.97ms step:1546/3200 train_loss:3.5428 train_time:218057ms step_avg:141.96ms step:1547/3200 train_loss:3.5213 train_time:218198ms step_avg:141.96ms step:1548/3200 train_loss:3.4851 train_time:218338ms step_avg:141.96ms step:1549/3200 train_loss:3.5129 train_time:218479ms step_avg:141.96ms step:1550/3200 train_loss:3.6380 train_time:218619ms step_avg:141.96ms step:1551/3200 train_loss:3.5543 train_time:218759ms step_avg:141.96ms step:1552/3200 train_loss:3.4980 train_time:218898ms step_avg:141.96ms step:1553/3200 train_loss:3.4947 train_time:219038ms step_avg:141.96ms step:1554/3200 train_loss:3.4844 train_time:219176ms step_avg:141.95ms step:1555/3200 train_loss:3.6067 train_time:219318ms step_avg:141.95ms step:1556/3200 train_loss:3.6171 train_time:219459ms step_avg:141.95ms step:1557/3200 train_loss:3.5447 train_time:219599ms step_avg:141.95ms step:1558/3200 train_loss:3.6056 train_time:219739ms step_avg:141.95ms step:1559/3200 train_loss:3.5292 train_time:219879ms step_avg:141.95ms step:1560/3200 train_loss:3.4405 train_time:220019ms step_avg:141.95ms step:1561/3200 train_loss:3.6831 train_time:220158ms step_avg:141.95ms step:1562/3200 train_loss:3.5022 train_time:220298ms step_avg:141.94ms step:1563/3200 train_loss:3.4858 train_time:220439ms step_avg:141.94ms step:1564/3200 train_loss:3.6101 train_time:220579ms step_avg:141.94ms step:1565/3200 train_loss:3.4367 train_time:220720ms step_avg:141.94ms step:1566/3200 train_loss:3.4934 train_time:220861ms step_avg:141.94ms step:1567/3200 train_loss:3.6388 train_time:221001ms step_avg:141.94ms step:1568/3200 train_loss:3.5170 train_time:221141ms step_avg:141.94ms step:1569/3200 train_loss:3.5036 train_time:221279ms step_avg:141.94ms step:1570/3200 train_loss:3.5995 train_time:221420ms step_avg:141.94ms step:1571/3200 train_loss:3.6120 train_time:221559ms step_avg:141.93ms step:1572/3200 train_loss:3.4413 train_time:221699ms step_avg:141.93ms step:1573/3200 train_loss:3.4667 train_time:221841ms step_avg:141.93ms step:1574/3200 train_loss:3.5847 train_time:221980ms step_avg:141.93ms step:1575/3200 train_loss:3.4524 train_time:222120ms step_avg:141.93ms step:1576/3200 train_loss:3.6036 train_time:222260ms step_avg:141.93ms step:1577/3200 train_loss:3.5085 train_time:222401ms step_avg:141.93ms step:1578/3200 train_loss:3.5584 train_time:222540ms step_avg:141.93ms step:1579/3200 train_loss:3.5304 train_time:222679ms step_avg:141.92ms step:1580/3200 train_loss:3.5028 train_time:222821ms step_avg:141.92ms step:1581/3200 train_loss:3.4770 train_time:222961ms step_avg:141.92ms step:1582/3200 train_loss:3.7145 train_time:223101ms step_avg:141.92ms step:1583/3200 train_loss:3.4978 train_time:223241ms step_avg:141.92ms step:1584/3200 train_loss:3.6425 train_time:223380ms step_avg:141.92ms step:1585/3200 train_loss:3.4785 train_time:223521ms step_avg:141.92ms step:1586/3200 train_loss:3.6340 train_time:223661ms step_avg:141.92ms step:1587/3200 train_loss:3.4227 train_time:223801ms step_avg:141.92ms step:1588/3200 train_loss:3.6145 train_time:223942ms step_avg:141.92ms step:1589/3200 train_loss:3.5302 train_time:224082ms step_avg:141.91ms step:1590/3200 train_loss:3.6863 train_time:224221ms step_avg:141.91ms step:1591/3200 train_loss:3.4989 train_time:224361ms step_avg:141.91ms step:1592/3200 train_loss:3.5176 train_time:224501ms step_avg:141.91ms step:1593/3200 train_loss:3.5914 train_time:224641ms step_avg:141.91ms step:1594/3200 train_loss:3.5612 train_time:224781ms step_avg:141.91ms step:1595/3200 train_loss:3.5380 train_time:224921ms step_avg:141.91ms step:1596/3200 train_loss:3.6789 train_time:225061ms step_avg:141.91ms step:1597/3200 train_loss:3.4114 train_time:225201ms step_avg:141.90ms step:1598/3200 train_loss:3.5740 train_time:225341ms step_avg:141.90ms step:1599/3200 train_loss:3.6101 train_time:225480ms step_avg:141.90ms step:1600/3200 train_loss:3.6613 train_time:225621ms step_avg:141.90ms step:1601/3200 train_loss:3.5158 train_time:225761ms step_avg:141.90ms step:1602/3200 train_loss:3.8092 train_time:225900ms step_avg:141.90ms step:1603/3200 train_loss:3.6957 train_time:226041ms step_avg:141.90ms step:1604/3200 train_loss:3.4784 train_time:226181ms step_avg:141.90ms step:1605/3200 train_loss:3.5164 train_time:226321ms step_avg:141.89ms step:1606/3200 train_loss:3.3998 train_time:226461ms step_avg:141.89ms step:1607/3200 train_loss:3.7225 train_time:226600ms step_avg:141.89ms step:1608/3200 train_loss:3.5274 train_time:226740ms step_avg:141.89ms step:1609/3200 train_loss:3.5452 train_time:226880ms step_avg:141.89ms step:1610/3200 train_loss:3.4974 train_time:227020ms step_avg:141.89ms step:1611/3200 train_loss:4.1020 train_time:227160ms step_avg:141.89ms step:1612/3200 train_loss:3.7315 train_time:227301ms step_avg:141.89ms step:1613/3200 train_loss:3.6429 train_time:227441ms step_avg:141.88ms step:1614/3200 train_loss:3.5072 train_time:227582ms step_avg:141.88ms step:1615/3200 train_loss:3.5554 train_time:227721ms step_avg:141.88ms step:1616/3200 train_loss:3.5431 train_time:227862ms step_avg:141.88ms step:1617/3200 train_loss:3.5082 train_time:228002ms step_avg:141.88ms step:1618/3200 train_loss:3.5785 train_time:228143ms step_avg:141.88ms step:1619/3200 train_loss:3.5392 train_time:228283ms step_avg:141.88ms step:1620/3200 train_loss:3.4319 train_time:228423ms step_avg:141.88ms step:1621/3200 train_loss:3.6962 train_time:228563ms step_avg:141.88ms step:1622/3200 train_loss:3.6066 train_time:228702ms step_avg:141.87ms step:1623/3200 train_loss:3.3986 train_time:228842ms step_avg:141.87ms step:1624/3200 train_loss:3.5160 train_time:228983ms step_avg:141.87ms step:1625/3200 train_loss:3.4771 train_time:229122ms step_avg:141.87ms step:1625/3200 val_loss:3.5441 train_time:229177ms step_avg:141.91ms step:1626/3200 train_loss:3.5527 train_time:229275ms step_avg:141.88ms step:1627/3200 train_loss:3.5184 train_time:229421ms step_avg:141.88ms step:1628/3200 train_loss:3.4759 train_time:229560ms step_avg:141.88ms step:1629/3200 train_loss:3.5858 train_time:229698ms step_avg:141.88ms step:1630/3200 train_loss:3.4813 train_time:229836ms step_avg:141.87ms step:1631/3200 train_loss:3.5400 train_time:229976ms step_avg:141.87ms step:1632/3200 train_loss:3.4182 train_time:230115ms step_avg:141.87ms step:1633/3200 train_loss:3.3894 train_time:230265ms step_avg:141.88ms step:1634/3200 train_loss:3.5546 train_time:230407ms step_avg:141.88ms step:1635/3200 train_loss:3.5356 train_time:230548ms step_avg:141.88ms step:1636/3200 train_loss:3.4763 train_time:230687ms step_avg:141.87ms step:1637/3200 train_loss:3.5716 train_time:230828ms step_avg:141.87ms step:1638/3200 train_loss:3.6202 train_time:230970ms step_avg:141.87ms step:1639/3200 train_loss:3.6480 train_time:231107ms step_avg:141.87ms step:1640/3200 train_loss:3.8115 train_time:231247ms step_avg:141.87ms step:1641/3200 train_loss:3.6309 train_time:231388ms step_avg:141.87ms step:1642/3200 train_loss:3.5511 train_time:231530ms step_avg:141.87ms step:1643/3200 train_loss:3.6264 train_time:231670ms step_avg:141.87ms step:1644/3200 train_loss:3.5349 train_time:231810ms step_avg:141.87ms step:1645/3200 train_loss:3.5474 train_time:231951ms step_avg:141.87ms step:1646/3200 train_loss:3.5509 train_time:232090ms step_avg:141.86ms step:1647/3200 train_loss:3.3190 train_time:232230ms step_avg:141.86ms step:1648/3200 train_loss:3.5854 train_time:232371ms step_avg:141.86ms step:1649/3200 train_loss:3.4480 train_time:232513ms step_avg:141.86ms step:1650/3200 train_loss:3.5302 train_time:232656ms step_avg:141.86ms step:1651/3200 train_loss:3.5008 train_time:232795ms step_avg:141.86ms step:1652/3200 train_loss:3.5706 train_time:232935ms step_avg:141.86ms step:1653/3200 train_loss:3.5009 train_time:233077ms step_avg:141.86ms step:1654/3200 train_loss:3.6319 train_time:233216ms step_avg:141.86ms step:1655/3200 train_loss:3.6213 train_time:233357ms step_avg:141.86ms step:1656/3200 train_loss:3.4377 train_time:233498ms step_avg:141.86ms step:1657/3200 train_loss:3.5960 train_time:233639ms step_avg:141.86ms step:1658/3200 train_loss:3.4888 train_time:233780ms step_avg:141.86ms step:1659/3200 train_loss:3.4714 train_time:233920ms step_avg:141.86ms step:1660/3200 train_loss:3.5539 train_time:234060ms step_avg:141.85ms step:1661/3200 train_loss:3.5757 train_time:234200ms step_avg:141.85ms step:1662/3200 train_loss:3.4910 train_time:234341ms step_avg:141.85ms step:1663/3200 train_loss:3.5845 train_time:234482ms step_avg:141.85ms step:1664/3200 train_loss:3.6009 train_time:234622ms step_avg:141.85ms step:1665/3200 train_loss:3.6228 train_time:234762ms step_avg:141.85ms step:1666/3200 train_loss:3.5996 train_time:234901ms step_avg:141.85ms step:1667/3200 train_loss:3.7380 train_time:235042ms step_avg:141.85ms step:1668/3200 train_loss:3.4459 train_time:235181ms step_avg:141.85ms step:1669/3200 train_loss:3.5249 train_time:235323ms step_avg:141.85ms step:1670/3200 train_loss:3.4562 train_time:235463ms step_avg:141.85ms step:1671/3200 train_loss:3.4613 train_time:235604ms step_avg:141.84ms step:1672/3200 train_loss:3.6155 train_time:235744ms step_avg:141.84ms step:1673/3200 train_loss:3.7999 train_time:235884ms step_avg:141.84ms step:1674/3200 train_loss:3.5213 train_time:236024ms step_avg:141.84ms step:1675/3200 train_loss:3.4974 train_time:236166ms step_avg:141.84ms step:1676/3200 train_loss:3.3922 train_time:236309ms step_avg:141.84ms step:1677/3200 train_loss:3.5951 train_time:236443ms step_avg:141.84ms step:1678/3200 train_loss:3.5024 train_time:236583ms step_avg:141.84ms step:1679/3200 train_loss:3.5336 train_time:236723ms step_avg:141.84ms step:1680/3200 train_loss:3.5206 train_time:236863ms step_avg:141.83ms step:1681/3200 train_loss:3.3371 train_time:237002ms step_avg:141.83ms step:1682/3200 train_loss:3.5218 train_time:237142ms step_avg:141.83ms step:1683/3200 train_loss:3.5407 train_time:237282ms step_avg:141.83ms step:1684/3200 train_loss:3.5875 train_time:237423ms step_avg:141.83ms step:1685/3200 train_loss:3.5785 train_time:237563ms step_avg:141.83ms step:1686/3200 train_loss:3.4921 train_time:237703ms step_avg:141.83ms step:1687/3200 train_loss:3.5985 train_time:237843ms step_avg:141.83ms step:1688/3200 train_loss:3.4805 train_time:237983ms step_avg:141.83ms step:1689/3200 train_loss:3.5611 train_time:238123ms step_avg:141.82ms step:1690/3200 train_loss:3.4750 train_time:238264ms step_avg:141.82ms step:1691/3200 train_loss:3.3831 train_time:238402ms step_avg:141.82ms step:1692/3200 train_loss:3.5261 train_time:238542ms step_avg:141.82ms step:1693/3200 train_loss:3.5216 train_time:238683ms step_avg:141.82ms step:1694/3200 train_loss:3.4392 train_time:238824ms step_avg:141.82ms step:1695/3200 train_loss:3.8848 train_time:238963ms step_avg:141.82ms step:1696/3200 train_loss:3.6004 train_time:239104ms step_avg:141.82ms step:1697/3200 train_loss:3.5766 train_time:239243ms step_avg:141.82ms step:1698/3200 train_loss:3.4859 train_time:239386ms step_avg:141.82ms step:1699/3200 train_loss:3.3982 train_time:239523ms step_avg:141.81ms step:1700/3200 train_loss:3.4932 train_time:239663ms step_avg:141.81ms step:1701/3200 train_loss:3.4836 train_time:239963ms step_avg:141.91ms step:1702/3200 train_loss:3.5564 train_time:240099ms step_avg:141.90ms step:1703/3200 train_loss:3.4839 train_time:240238ms step_avg:141.90ms step:1704/3200 train_loss:3.6826 train_time:240377ms step_avg:141.90ms step:1705/3200 train_loss:3.4458 train_time:240515ms step_avg:141.90ms step:1706/3200 train_loss:3.6739 train_time:240654ms step_avg:141.90ms step:1707/3200 train_loss:3.5192 train_time:240794ms step_avg:141.89ms step:1708/3200 train_loss:3.2942 train_time:240939ms step_avg:141.90ms step:1709/3200 train_loss:3.6310 train_time:241082ms step_avg:141.90ms step:1710/3200 train_loss:3.5421 train_time:241384ms step_avg:141.99ms step:1711/3200 train_loss:3.5335 train_time:241520ms step_avg:141.99ms step:1712/3200 train_loss:3.5222 train_time:241658ms step_avg:141.99ms step:1713/3200 train_loss:3.5605 train_time:241797ms step_avg:141.98ms step:1714/3200 train_loss:3.5859 train_time:241936ms step_avg:141.98ms step:1715/3200 train_loss:3.5121 train_time:242075ms step_avg:141.98ms step:1716/3200 train_loss:3.5117 train_time:242215ms step_avg:141.98ms step:1717/3200 train_loss:3.3505 train_time:242362ms step_avg:141.98ms step:1718/3200 train_loss:3.4872 train_time:242503ms step_avg:141.98ms step:1719/3200 train_loss:3.5093 train_time:242644ms step_avg:141.98ms step:1720/3200 train_loss:3.4567 train_time:242783ms step_avg:141.98ms step:1721/3200 train_loss:3.6066 train_time:242922ms step_avg:141.98ms step:1722/3200 train_loss:3.4198 train_time:243060ms step_avg:141.97ms step:1723/3200 train_loss:3.5605 train_time:243201ms step_avg:141.97ms step:1724/3200 train_loss:3.6429 train_time:243343ms step_avg:141.97ms step:1725/3200 train_loss:3.5007 train_time:243485ms step_avg:141.97ms step:1726/3200 train_loss:3.7165 train_time:243625ms step_avg:141.97ms step:1727/3200 train_loss:3.5071 train_time:243765ms step_avg:141.97ms step:1728/3200 train_loss:3.5684 train_time:243903ms step_avg:141.97ms step:1729/3200 train_loss:3.5435 train_time:244043ms step_avg:141.97ms step:1730/3200 train_loss:3.5540 train_time:244183ms step_avg:141.97ms step:1731/3200 train_loss:3.9206 train_time:244325ms step_avg:141.97ms step:1732/3200 train_loss:3.5348 train_time:244466ms step_avg:141.97ms step:1733/3200 train_loss:3.6611 train_time:244606ms step_avg:141.97ms step:1734/3200 train_loss:3.4446 train_time:244746ms step_avg:141.96ms step:1735/3200 train_loss:3.4801 train_time:244885ms step_avg:141.96ms step:1736/3200 train_loss:3.5057 train_time:245025ms step_avg:141.96ms step:1737/3200 train_loss:3.4840 train_time:245166ms step_avg:141.96ms step:1738/3200 train_loss:3.6337 train_time:245306ms step_avg:141.96ms step:1739/3200 train_loss:3.4948 train_time:245446ms step_avg:141.96ms step:1740/3200 train_loss:3.5514 train_time:245587ms step_avg:141.96ms step:1741/3200 train_loss:3.6064 train_time:245727ms step_avg:141.96ms step:1742/3200 train_loss:3.4131 train_time:245869ms step_avg:141.96ms step:1743/3200 train_loss:3.3041 train_time:246008ms step_avg:141.96ms step:1744/3200 train_loss:3.2653 train_time:246148ms step_avg:141.95ms step:1745/3200 train_loss:3.5277 train_time:246287ms step_avg:141.95ms step:1746/3200 train_loss:3.5442 train_time:246428ms step_avg:141.95ms step:1747/3200 train_loss:3.5164 train_time:246569ms step_avg:141.95ms step:1748/3200 train_loss:3.5249 train_time:246710ms step_avg:141.95ms step:1749/3200 train_loss:3.7632 train_time:246851ms step_avg:141.95ms step:1750/3200 train_loss:3.4768 train_time:246991ms step_avg:141.95ms step:1750/3200 val_loss:3.5257 train_time:247047ms step_avg:141.98ms step:1751/3200 train_loss:3.5499 train_time:247142ms step_avg:141.95ms step:1752/3200 train_loss:3.5326 train_time:247289ms step_avg:141.96ms step:1753/3200 train_loss:3.1737 train_time:247428ms step_avg:141.96ms step:1754/3200 train_loss:3.2973 train_time:247566ms step_avg:141.95ms step:1755/3200 train_loss:3.3931 train_time:247705ms step_avg:141.95ms step:1756/3200 train_loss:3.3419 train_time:247843ms step_avg:141.95ms step:1757/3200 train_loss:3.5001 train_time:247982ms step_avg:141.95ms step:1758/3200 train_loss:3.3834 train_time:248124ms step_avg:141.95ms step:1759/3200 train_loss:3.3783 train_time:248269ms step_avg:141.95ms step:1760/3200 train_loss:4.4416 train_time:248409ms step_avg:141.95ms step:1761/3200 train_loss:3.5106 train_time:248550ms step_avg:141.95ms step:1762/3200 train_loss:3.5435 train_time:248689ms step_avg:141.95ms step:1763/3200 train_loss:3.5438 train_time:248828ms step_avg:141.94ms step:1764/3200 train_loss:3.5617 train_time:248967ms step_avg:141.94ms step:1765/3200 train_loss:3.4798 train_time:249108ms step_avg:141.94ms step:1766/3200 train_loss:3.5096 train_time:249250ms step_avg:141.94ms step:1767/3200 train_loss:3.5348 train_time:249391ms step_avg:141.94ms step:1768/3200 train_loss:3.7884 train_time:249533ms step_avg:141.94ms step:1769/3200 train_loss:3.5089 train_time:249671ms step_avg:141.94ms step:1770/3200 train_loss:3.5789 train_time:249810ms step_avg:141.94ms step:1771/3200 train_loss:3.9510 train_time:249950ms step_avg:141.94ms step:1772/3200 train_loss:3.5182 train_time:250090ms step_avg:141.94ms step:1773/3200 train_loss:3.4188 train_time:250230ms step_avg:141.93ms step:1774/3200 train_loss:3.6739 train_time:250370ms step_avg:141.93ms step:1775/3200 train_loss:3.4073 train_time:250511ms step_avg:141.93ms step:1776/3200 train_loss:3.5687 train_time:250650ms step_avg:141.93ms step:1777/3200 train_loss:3.6249 train_time:250790ms step_avg:141.93ms step:1778/3200 train_loss:3.7160 train_time:250930ms step_avg:141.93ms step:1779/3200 train_loss:3.5235 train_time:251070ms step_avg:141.93ms step:1780/3200 train_loss:3.8157 train_time:251211ms step_avg:141.93ms step:1781/3200 train_loss:3.5942 train_time:251351ms step_avg:141.93ms step:1782/3200 train_loss:3.6089 train_time:251491ms step_avg:141.93ms step:1783/3200 train_loss:3.3910 train_time:251631ms step_avg:141.92ms step:1784/3200 train_loss:3.4745 train_time:251771ms step_avg:141.92ms step:1785/3200 train_loss:3.6185 train_time:251912ms step_avg:141.92ms step:1786/3200 train_loss:3.5016 train_time:252052ms step_avg:141.92ms step:1787/3200 train_loss:3.6766 train_time:252192ms step_avg:141.92ms step:1788/3200 train_loss:3.4944 train_time:252332ms step_avg:141.92ms step:1789/3200 train_loss:3.4646 train_time:252472ms step_avg:141.92ms step:1790/3200 train_loss:3.6134 train_time:252614ms step_avg:141.92ms step:1791/3200 train_loss:3.5081 train_time:252753ms step_avg:141.92ms step:1792/3200 train_loss:3.4603 train_time:252895ms step_avg:141.92ms step:1793/3200 train_loss:3.5927 train_time:253034ms step_avg:141.91ms step:1794/3200 train_loss:3.4703 train_time:253173ms step_avg:141.91ms step:1795/3200 train_loss:3.4538 train_time:253314ms step_avg:141.91ms step:1796/3200 train_loss:3.5173 train_time:253453ms step_avg:141.91ms step:1797/3200 train_loss:3.4933 train_time:253594ms step_avg:141.91ms step:1798/3200 train_loss:3.6131 train_time:253734ms step_avg:141.91ms step:1799/3200 train_loss:3.4977 train_time:253873ms step_avg:141.91ms step:1800/3200 train_loss:3.5807 train_time:254014ms step_avg:141.91ms step:1801/3200 train_loss:3.5084 train_time:254153ms step_avg:141.91ms step:1802/3200 train_loss:3.5450 train_time:254293ms step_avg:141.90ms step:1803/3200 train_loss:3.4615 train_time:254433ms step_avg:141.90ms step:1804/3200 train_loss:3.3872 train_time:254572ms step_avg:141.90ms step:1805/3200 train_loss:3.6305 train_time:254712ms step_avg:141.90ms step:1806/3200 train_loss:3.5593 train_time:254851ms step_avg:141.90ms step:1807/3200 train_loss:3.5638 train_time:254991ms step_avg:141.90ms step:1808/3200 train_loss:3.6736 train_time:255131ms step_avg:141.90ms step:1809/3200 train_loss:3.4733 train_time:255271ms step_avg:141.90ms step:1810/3200 train_loss:3.5731 train_time:255411ms step_avg:141.89ms step:1811/3200 train_loss:3.7078 train_time:255552ms step_avg:141.89ms step:1812/3200 train_loss:3.5594 train_time:255691ms step_avg:141.89ms step:1813/3200 train_loss:3.6042 train_time:255832ms step_avg:141.89ms step:1814/3200 train_loss:3.6296 train_time:255971ms step_avg:141.89ms step:1815/3200 train_loss:3.5718 train_time:256111ms step_avg:141.89ms step:1816/3200 train_loss:3.6107 train_time:256251ms step_avg:141.89ms step:1817/3200 train_loss:3.5609 train_time:256393ms step_avg:141.89ms step:1818/3200 train_loss:3.6185 train_time:256532ms step_avg:141.89ms step:1819/3200 train_loss:3.5402 train_time:256673ms step_avg:141.89ms step:1820/3200 train_loss:3.5304 train_time:256814ms step_avg:141.89ms step:1821/3200 train_loss:3.4837 train_time:256958ms step_avg:141.89ms step:1822/3200 train_loss:3.4627 train_time:257093ms step_avg:141.88ms step:1823/3200 train_loss:3.3855 train_time:257233ms step_avg:141.88ms step:1824/3200 train_loss:3.5451 train_time:257373ms step_avg:141.88ms step:1825/3200 train_loss:3.6658 train_time:257513ms step_avg:141.88ms step:1826/3200 train_loss:3.6161 train_time:257654ms step_avg:141.88ms step:1827/3200 train_loss:3.5965 train_time:257794ms step_avg:141.88ms step:1828/3200 train_loss:3.4669 train_time:257934ms step_avg:141.88ms step:1829/3200 train_loss:3.4936 train_time:258074ms step_avg:141.88ms step:1830/3200 train_loss:3.6256 train_time:258215ms step_avg:141.88ms step:1831/3200 train_loss:3.4007 train_time:258356ms step_avg:141.88ms step:1832/3200 train_loss:3.5511 train_time:258497ms step_avg:141.88ms step:1833/3200 train_loss:3.4378 train_time:258639ms step_avg:141.88ms step:1834/3200 train_loss:3.7513 train_time:258780ms step_avg:141.87ms step:1835/3200 train_loss:3.5839 train_time:258920ms step_avg:141.87ms step:1836/3200 train_loss:3.5675 train_time:259062ms step_avg:141.87ms step:1837/3200 train_loss:3.6909 train_time:259204ms step_avg:141.87ms step:1838/3200 train_loss:3.5519 train_time:259344ms step_avg:141.87ms step:1839/3200 train_loss:3.4345 train_time:259486ms step_avg:141.87ms step:1840/3200 train_loss:3.5575 train_time:259625ms step_avg:141.87ms step:1841/3200 train_loss:3.4375 train_time:259765ms step_avg:141.87ms step:1842/3200 train_loss:3.5468 train_time:259906ms step_avg:141.87ms step:1843/3200 train_loss:3.6006 train_time:260046ms step_avg:141.87ms step:1844/3200 train_loss:3.3554 train_time:260187ms step_avg:141.87ms step:1845/3200 train_loss:3.4768 train_time:260326ms step_avg:141.87ms step:1846/3200 train_loss:3.5345 train_time:260466ms step_avg:141.87ms step:1847/3200 train_loss:3.4797 train_time:260606ms step_avg:141.87ms step:1848/3200 train_loss:3.3774 train_time:260748ms step_avg:141.87ms step:1849/3200 train_loss:3.6442 train_time:260887ms step_avg:141.86ms step:1850/3200 train_loss:3.4109 train_time:261027ms step_avg:141.86ms step:1851/3200 train_loss:3.4926 train_time:261177ms step_avg:141.87ms step:1852/3200 train_loss:3.4543 train_time:261307ms step_avg:141.86ms step:1853/3200 train_loss:3.6494 train_time:261446ms step_avg:141.86ms step:1854/3200 train_loss:3.6290 train_time:261587ms step_avg:141.86ms step:1855/3200 train_loss:3.5050 train_time:261727ms step_avg:141.86ms step:1856/3200 train_loss:3.4558 train_time:261867ms step_avg:141.86ms step:1857/3200 train_loss:3.4861 train_time:262007ms step_avg:141.86ms step:1858/3200 train_loss:3.7318 train_time:262147ms step_avg:141.85ms step:1859/3200 train_loss:3.5851 train_time:262286ms step_avg:141.85ms step:1860/3200 train_loss:3.5117 train_time:262427ms step_avg:141.85ms step:1861/3200 train_loss:3.5548 train_time:262566ms step_avg:141.85ms step:1862/3200 train_loss:3.4498 train_time:262706ms step_avg:141.85ms step:1863/3200 train_loss:3.4472 train_time:262845ms step_avg:141.85ms step:1864/3200 train_loss:3.5134 train_time:262986ms step_avg:141.85ms step:1865/3200 train_loss:3.5576 train_time:263127ms step_avg:141.85ms step:1866/3200 train_loss:3.3113 train_time:263266ms step_avg:141.85ms step:1867/3200 train_loss:3.4480 train_time:263406ms step_avg:141.84ms step:1868/3200 train_loss:3.4099 train_time:263547ms step_avg:141.84ms step:1869/3200 train_loss:3.4039 train_time:263687ms step_avg:141.84ms step:1870/3200 train_loss:3.5604 train_time:263826ms step_avg:141.84ms step:1871/3200 train_loss:3.5452 train_time:263967ms step_avg:141.84ms step:1872/3200 train_loss:3.4923 train_time:264108ms step_avg:141.84ms step:1873/3200 train_loss:3.5022 train_time:264247ms step_avg:141.84ms step:1874/3200 train_loss:3.4355 train_time:264387ms step_avg:141.84ms step:1875/3200 train_loss:3.5383 train_time:264527ms step_avg:141.84ms step:1875/3200 val_loss:3.5118 train_time:264583ms step_avg:141.87ms step:1876/3200 train_loss:3.5341 train_time:264682ms step_avg:141.84ms step:1877/3200 train_loss:3.4610 train_time:264823ms step_avg:141.84ms step:1878/3200 train_loss:3.5044 train_time:264964ms step_avg:141.84ms step:1879/3200 train_loss:3.6145 train_time:265104ms step_avg:141.84ms step:1880/3200 train_loss:3.4993 train_time:265243ms step_avg:141.84ms step:1881/3200 train_loss:3.5497 train_time:265384ms step_avg:141.84ms step:1882/3200 train_loss:3.4655 train_time:265521ms step_avg:141.84ms step:1883/3200 train_loss:3.5394 train_time:265665ms step_avg:141.84ms step:1884/3200 train_loss:3.5369 train_time:265808ms step_avg:141.84ms step:1885/3200 train_loss:3.2911 train_time:265950ms step_avg:141.84ms step:1886/3200 train_loss:3.6897 train_time:266089ms step_avg:141.84ms step:1887/3200 train_loss:3.4190 train_time:266227ms step_avg:141.84ms step:1888/3200 train_loss:3.4403 train_time:266367ms step_avg:141.84ms step:1889/3200 train_loss:3.5121 train_time:266507ms step_avg:141.83ms step:1890/3200 train_loss:3.5524 train_time:266811ms step_avg:141.92ms step:1891/3200 train_loss:3.3784 train_time:266948ms step_avg:141.92ms step:1892/3200 train_loss:3.6503 train_time:267088ms step_avg:141.92ms step:1893/3200 train_loss:3.4092 train_time:267226ms step_avg:141.91ms step:1894/3200 train_loss:3.5355 train_time:267364ms step_avg:141.91ms step:1895/3200 train_loss:3.5788 train_time:267503ms step_avg:141.91ms step:1896/3200 train_loss:3.3753 train_time:267642ms step_avg:141.91ms step:1897/3200 train_loss:3.5419 train_time:267788ms step_avg:141.91ms step:1898/3200 train_loss:3.5020 train_time:267933ms step_avg:141.91ms step:1899/3200 train_loss:3.5804 train_time:268073ms step_avg:141.91ms step:1900/3200 train_loss:3.3659 train_time:268379ms step_avg:142.00ms step:1901/3200 train_loss:3.6036 train_time:268517ms step_avg:142.00ms step:1902/3200 train_loss:3.4903 train_time:268656ms step_avg:142.00ms step:1903/3200 train_loss:3.6474 train_time:268794ms step_avg:141.99ms step:1904/3200 train_loss:3.4562 train_time:268933ms step_avg:141.99ms step:1905/3200 train_loss:3.7321 train_time:269071ms step_avg:141.99ms step:1906/3200 train_loss:3.4622 train_time:269209ms step_avg:141.99ms step:1907/3200 train_loss:3.4624 train_time:269353ms step_avg:141.99ms step:1908/3200 train_loss:3.5326 train_time:269496ms step_avg:141.99ms step:1909/3200 train_loss:3.4182 train_time:269636ms step_avg:141.99ms step:1910/3200 train_loss:3.4860 train_time:269776ms step_avg:141.99ms step:1911/3200 train_loss:3.5778 train_time:269916ms step_avg:141.99ms step:1912/3200 train_loss:3.5079 train_time:270054ms step_avg:141.98ms step:1913/3200 train_loss:3.3764 train_time:270194ms step_avg:141.98ms step:1914/3200 train_loss:3.2671 train_time:270334ms step_avg:141.98ms step:1915/3200 train_loss:3.4504 train_time:270475ms step_avg:141.98ms step:1916/3200 train_loss:3.6803 train_time:270615ms step_avg:141.98ms step:1917/3200 train_loss:3.6668 train_time:270756ms step_avg:141.98ms step:1918/3200 train_loss:3.6228 train_time:270896ms step_avg:141.98ms step:1919/3200 train_loss:3.4494 train_time:271037ms step_avg:141.98ms step:1920/3200 train_loss:3.6885 train_time:271176ms step_avg:141.98ms step:1921/3200 train_loss:3.5122 train_time:271318ms step_avg:141.98ms step:1922/3200 train_loss:3.4456 train_time:271457ms step_avg:141.98ms step:1923/3200 train_loss:3.6257 train_time:271599ms step_avg:141.98ms step:1924/3200 train_loss:3.5878 train_time:271739ms step_avg:141.97ms step:1925/3200 train_loss:3.4301 train_time:271880ms step_avg:141.97ms step:1926/3200 train_loss:3.4617 train_time:272026ms step_avg:141.98ms step:1927/3200 train_loss:3.3724 train_time:272162ms step_avg:141.97ms step:1928/3200 train_loss:3.4776 train_time:272302ms step_avg:141.97ms step:1929/3200 train_loss:3.3417 train_time:272443ms step_avg:141.97ms step:1930/3200 train_loss:3.4532 train_time:272583ms step_avg:141.97ms step:1931/3200 train_loss:3.5816 train_time:272724ms step_avg:141.97ms step:1932/3200 train_loss:3.4529 train_time:272865ms step_avg:141.97ms step:1933/3200 train_loss:3.5964 train_time:273005ms step_avg:141.97ms step:1934/3200 train_loss:3.4693 train_time:273146ms step_avg:141.97ms step:1935/3200 train_loss:3.5142 train_time:273286ms step_avg:141.97ms step:1936/3200 train_loss:3.5544 train_time:273426ms step_avg:141.97ms step:1937/3200 train_loss:3.5105 train_time:273569ms step_avg:141.97ms step:1938/3200 train_loss:3.5312 train_time:273708ms step_avg:141.96ms step:1939/3200 train_loss:3.4618 train_time:273849ms step_avg:141.96ms step:1940/3200 train_loss:3.5578 train_time:273989ms step_avg:141.96ms step:1941/3200 train_loss:3.5911 train_time:274127ms step_avg:141.96ms step:1942/3200 train_loss:3.4307 train_time:274268ms step_avg:141.96ms step:1943/3200 train_loss:3.4630 train_time:274408ms step_avg:141.96ms step:1944/3200 train_loss:3.5329 train_time:274549ms step_avg:141.96ms step:1945/3200 train_loss:3.3747 train_time:274689ms step_avg:141.96ms step:1946/3200 train_loss:3.6423 train_time:274829ms step_avg:141.96ms step:1947/3200 train_loss:3.5155 train_time:274969ms step_avg:141.96ms step:1948/3200 train_loss:3.4942 train_time:275110ms step_avg:141.96ms step:1949/3200 train_loss:3.4922 train_time:275249ms step_avg:141.95ms step:1950/3200 train_loss:3.3814 train_time:275392ms step_avg:141.95ms step:1951/3200 train_loss:3.4937 train_time:275530ms step_avg:141.95ms step:1952/3200 train_loss:3.3458 train_time:275671ms step_avg:141.95ms step:1953/3200 train_loss:3.5543 train_time:275810ms step_avg:141.95ms step:1954/3200 train_loss:3.5523 train_time:275950ms step_avg:141.95ms step:1955/3200 train_loss:3.5013 train_time:276092ms step_avg:141.95ms step:1956/3200 train_loss:3.3906 train_time:276231ms step_avg:141.95ms step:1957/3200 train_loss:3.4791 train_time:276374ms step_avg:141.95ms step:1958/3200 train_loss:3.6639 train_time:276510ms step_avg:141.95ms step:1959/3200 train_loss:3.5804 train_time:276650ms step_avg:141.94ms step:1960/3200 train_loss:3.6048 train_time:276790ms step_avg:141.94ms step:1961/3200 train_loss:3.4074 train_time:276931ms step_avg:141.94ms step:1962/3200 train_loss:3.5261 train_time:277070ms step_avg:141.94ms step:1963/3200 train_loss:3.5783 train_time:277210ms step_avg:141.94ms step:1964/3200 train_loss:3.5225 train_time:277351ms step_avg:141.94ms step:1965/3200 train_loss:3.4237 train_time:277492ms step_avg:141.94ms step:1966/3200 train_loss:3.8364 train_time:277632ms step_avg:141.94ms step:1967/3200 train_loss:3.4462 train_time:277773ms step_avg:141.94ms step:1968/3200 train_loss:3.4914 train_time:277912ms step_avg:141.94ms step:1969/3200 train_loss:3.5392 train_time:278053ms step_avg:141.94ms step:1970/3200 train_loss:3.4928 train_time:278193ms step_avg:141.93ms step:1971/3200 train_loss:3.3903 train_time:278333ms step_avg:141.93ms step:1972/3200 train_loss:3.3654 train_time:278472ms step_avg:141.93ms step:1973/3200 train_loss:3.4849 train_time:278612ms step_avg:141.93ms step:1974/3200 train_loss:3.4567 train_time:278754ms step_avg:141.93ms step:1975/3200 train_loss:3.4288 train_time:278892ms step_avg:141.93ms step:1976/3200 train_loss:3.5879 train_time:279032ms step_avg:141.93ms step:1977/3200 train_loss:3.4520 train_time:279171ms step_avg:141.93ms step:1978/3200 train_loss:3.8269 train_time:279311ms step_avg:141.93ms step:1979/3200 train_loss:3.5079 train_time:279451ms step_avg:141.93ms step:1980/3200 train_loss:3.5090 train_time:279591ms step_avg:141.92ms step:1981/3200 train_loss:3.5161 train_time:279731ms step_avg:141.92ms step:1982/3200 train_loss:3.5374 train_time:279872ms step_avg:141.92ms step:1983/3200 train_loss:3.4668 train_time:280012ms step_avg:141.92ms step:1984/3200 train_loss:3.4290 train_time:280152ms step_avg:141.92ms step:1985/3200 train_loss:3.4855 train_time:280293ms step_avg:141.92ms step:1986/3200 train_loss:3.5500 train_time:280432ms step_avg:141.92ms step:1987/3200 train_loss:3.5241 train_time:280576ms step_avg:141.92ms step:1988/3200 train_loss:3.4945 train_time:280715ms step_avg:141.92ms step:1989/3200 train_loss:3.5748 train_time:280853ms step_avg:141.92ms step:1990/3200 train_loss:3.6062 train_time:280993ms step_avg:141.92ms step:1991/3200 train_loss:3.3866 train_time:281134ms step_avg:141.92ms step:1992/3200 train_loss:3.3883 train_time:281273ms step_avg:141.91ms step:1993/3200 train_loss:3.5698 train_time:281414ms step_avg:141.91ms step:1994/3200 train_loss:3.3933 train_time:281554ms step_avg:141.91ms step:1995/3200 train_loss:3.4771 train_time:281695ms step_avg:141.91ms step:1996/3200 train_loss:3.5504 train_time:281835ms step_avg:141.91ms step:1997/3200 train_loss:3.4207 train_time:281974ms step_avg:141.91ms step:1998/3200 train_loss:3.5224 train_time:282114ms step_avg:141.91ms step:1999/3200 train_loss:3.5252 train_time:282254ms step_avg:141.91ms step:2000/3200 train_loss:3.4420 train_time:282394ms step_avg:141.91ms step:2000/3200 val_loss:3.4976 train_time:282450ms step_avg:141.93ms step:2001/3200 train_loss:3.5866 train_time:282546ms step_avg:141.91ms step:2002/3200 train_loss:3.5326 train_time:282692ms step_avg:141.91ms step:2003/3200 train_loss:3.6187 train_time:282832ms step_avg:141.91ms step:2004/3200 train_loss:3.5378 train_time:282971ms step_avg:141.91ms step:2005/3200 train_loss:3.5519 train_time:283110ms step_avg:141.91ms step:2006/3200 train_loss:3.4398 train_time:283249ms step_avg:141.91ms step:2007/3200 train_loss:3.4688 train_time:283389ms step_avg:141.91ms step:2008/3200 train_loss:3.5066 train_time:283533ms step_avg:141.91ms step:2009/3200 train_loss:3.5513 train_time:283677ms step_avg:141.91ms step:2010/3200 train_loss:3.4481 train_time:283817ms step_avg:141.91ms step:2011/3200 train_loss:3.5321 train_time:283957ms step_avg:141.91ms step:2012/3200 train_loss:3.5087 train_time:284097ms step_avg:141.91ms step:2013/3200 train_loss:3.5106 train_time:284236ms step_avg:141.91ms step:2014/3200 train_loss:3.4291 train_time:284376ms step_avg:141.90ms step:2015/3200 train_loss:3.4740 train_time:284516ms step_avg:141.90ms step:2016/3200 train_loss:3.4907 train_time:284658ms step_avg:141.90ms step:2017/3200 train_loss:3.6217 train_time:284800ms step_avg:141.90ms step:2018/3200 train_loss:3.4697 train_time:284940ms step_avg:141.90ms step:2019/3200 train_loss:3.6317 train_time:285080ms step_avg:141.90ms step:2020/3200 train_loss:3.6314 train_time:285220ms step_avg:141.90ms step:2021/3200 train_loss:3.3484 train_time:285360ms step_avg:141.90ms step:2022/3200 train_loss:3.5794 train_time:285501ms step_avg:141.90ms step:2023/3200 train_loss:3.4987 train_time:285641ms step_avg:141.90ms step:2024/3200 train_loss:3.5920 train_time:285782ms step_avg:141.90ms step:2025/3200 train_loss:3.6416 train_time:285923ms step_avg:141.90ms step:2026/3200 train_loss:3.4204 train_time:286063ms step_avg:141.90ms step:2027/3200 train_loss:3.4618 train_time:286202ms step_avg:141.90ms step:2028/3200 train_loss:3.3680 train_time:286342ms step_avg:141.89ms step:2029/3200 train_loss:3.4822 train_time:286483ms step_avg:141.89ms step:2030/3200 train_loss:3.3996 train_time:286623ms step_avg:141.89ms step:2031/3200 train_loss:3.4892 train_time:286763ms step_avg:141.89ms step:2032/3200 train_loss:3.4832 train_time:286904ms step_avg:141.89ms step:2033/3200 train_loss:3.5016 train_time:287047ms step_avg:141.89ms step:2034/3200 train_loss:3.3936 train_time:287185ms step_avg:141.89ms step:2035/3200 train_loss:3.5553 train_time:287325ms step_avg:141.89ms step:2036/3200 train_loss:3.5525 train_time:287465ms step_avg:141.89ms step:2037/3200 train_loss:3.5416 train_time:287606ms step_avg:141.89ms step:2038/3200 train_loss:3.4167 train_time:287748ms step_avg:141.89ms step:2039/3200 train_loss:3.6771 train_time:287889ms step_avg:141.89ms step:2040/3200 train_loss:3.5075 train_time:288031ms step_avg:141.89ms step:2041/3200 train_loss:3.5275 train_time:288172ms step_avg:141.89ms step:2042/3200 train_loss:3.4773 train_time:288313ms step_avg:141.89ms step:2043/3200 train_loss:3.3744 train_time:288454ms step_avg:141.89ms step:2044/3200 train_loss:3.4993 train_time:288595ms step_avg:141.89ms step:2045/3200 train_loss:3.5025 train_time:288734ms step_avg:141.88ms step:2046/3200 train_loss:3.3627 train_time:288875ms step_avg:141.88ms step:2047/3200 train_loss:3.4472 train_time:289016ms step_avg:141.88ms step:2048/3200 train_loss:3.5147 train_time:289156ms step_avg:141.88ms step:2049/3200 train_loss:3.4649 train_time:289297ms step_avg:141.88ms step:2050/3200 train_loss:3.5165 train_time:289436ms step_avg:141.88ms step:2051/3200 train_loss:3.6565 train_time:289576ms step_avg:141.88ms step:2052/3200 train_loss:3.5241 train_time:289715ms step_avg:141.88ms step:2053/3200 train_loss:3.4773 train_time:289855ms step_avg:141.88ms step:2054/3200 train_loss:3.4524 train_time:289996ms step_avg:141.88ms step:2055/3200 train_loss:3.3272 train_time:290136ms step_avg:141.88ms step:2056/3200 train_loss:3.4349 train_time:290283ms step_avg:141.88ms step:2057/3200 train_loss:3.6086 train_time:290416ms step_avg:141.87ms step:2058/3200 train_loss:3.6308 train_time:290557ms step_avg:141.87ms step:2059/3200 train_loss:3.4945 train_time:290697ms step_avg:141.87ms step:2060/3200 train_loss:3.5341 train_time:290837ms step_avg:141.87ms step:2061/3200 train_loss:3.5204 train_time:290977ms step_avg:141.87ms step:2062/3200 train_loss:3.4684 train_time:291117ms step_avg:141.87ms step:2063/3200 train_loss:3.3876 train_time:291257ms step_avg:141.87ms step:2064/3200 train_loss:3.6948 train_time:291398ms step_avg:141.87ms step:2065/3200 train_loss:3.5580 train_time:291537ms step_avg:141.87ms step:2066/3200 train_loss:3.5030 train_time:291677ms step_avg:141.87ms step:2067/3200 train_loss:3.5515 train_time:291816ms step_avg:141.86ms step:2068/3200 train_loss:3.4536 train_time:291957ms step_avg:141.86ms step:2069/3200 train_loss:3.5049 train_time:292097ms step_avg:141.86ms step:2070/3200 train_loss:3.6350 train_time:292237ms step_avg:141.86ms step:2071/3200 train_loss:3.6420 train_time:292378ms step_avg:141.86ms step:2072/3200 train_loss:3.4951 train_time:292518ms step_avg:141.86ms step:2073/3200 train_loss:3.5243 train_time:292657ms step_avg:141.86ms step:2074/3200 train_loss:3.4177 train_time:292797ms step_avg:141.86ms step:2075/3200 train_loss:3.9433 train_time:292937ms step_avg:141.86ms step:2076/3200 train_loss:3.3709 train_time:293078ms step_avg:141.86ms step:2077/3200 train_loss:3.5414 train_time:293217ms step_avg:141.86ms step:2078/3200 train_loss:3.4276 train_time:293358ms step_avg:141.86ms step:2079/3200 train_loss:3.4029 train_time:293663ms step_avg:141.93ms step:2080/3200 train_loss:3.4985 train_time:293798ms step_avg:141.93ms step:2081/3200 train_loss:3.7614 train_time:293938ms step_avg:141.93ms step:2082/3200 train_loss:3.3742 train_time:294077ms step_avg:141.93ms step:2083/3200 train_loss:3.7096 train_time:294214ms step_avg:141.93ms step:2084/3200 train_loss:3.4171 train_time:294353ms step_avg:141.93ms step:2085/3200 train_loss:3.4056 train_time:294491ms step_avg:141.92ms step:2086/3200 train_loss:3.6462 train_time:294638ms step_avg:141.93ms step:2087/3200 train_loss:3.5719 train_time:294781ms step_avg:141.93ms step:2088/3200 train_loss:3.5590 train_time:294920ms step_avg:141.92ms step:2089/3200 train_loss:3.6218 train_time:295059ms step_avg:141.92ms step:2090/3200 train_loss:3.5431 train_time:295362ms step_avg:142.00ms step:2091/3200 train_loss:3.5323 train_time:295501ms step_avg:142.00ms step:2092/3200 train_loss:3.4882 train_time:295650ms step_avg:142.00ms step:2093/3200 train_loss:3.5587 train_time:295779ms step_avg:142.00ms step:2094/3200 train_loss:3.4625 train_time:295917ms step_avg:141.99ms step:2095/3200 train_loss:3.2538 train_time:296056ms step_avg:141.99ms step:2096/3200 train_loss:3.4784 train_time:296194ms step_avg:141.99ms step:2097/3200 train_loss:3.6538 train_time:296338ms step_avg:141.99ms step:2098/3200 train_loss:3.4770 train_time:296483ms step_avg:141.99ms step:2099/3200 train_loss:3.3710 train_time:296624ms step_avg:141.99ms step:2100/3200 train_loss:3.4700 train_time:296763ms step_avg:141.99ms step:2101/3200 train_loss:3.4312 train_time:296902ms step_avg:141.99ms step:2102/3200 train_loss:3.5610 train_time:297040ms step_avg:141.99ms step:2103/3200 train_loss:3.4040 train_time:297181ms step_avg:141.99ms step:2104/3200 train_loss:3.3688 train_time:297322ms step_avg:141.99ms step:2105/3200 train_loss:3.6286 train_time:297463ms step_avg:141.99ms step:2106/3200 train_loss:3.3617 train_time:297603ms step_avg:141.99ms step:2107/3200 train_loss:3.7583 train_time:297743ms step_avg:141.99ms step:2108/3200 train_loss:3.5930 train_time:297885ms step_avg:141.99ms step:2109/3200 train_loss:3.4940 train_time:298024ms step_avg:141.98ms step:2110/3200 train_loss:3.5153 train_time:298163ms step_avg:141.98ms step:2111/3200 train_loss:3.3391 train_time:298304ms step_avg:141.98ms step:2112/3200 train_loss:3.8293 train_time:298445ms step_avg:141.98ms step:2113/3200 train_loss:3.5202 train_time:298589ms step_avg:141.98ms step:2114/3200 train_loss:3.4367 train_time:298731ms step_avg:141.98ms step:2115/3200 train_loss:3.5583 train_time:298871ms step_avg:141.98ms step:2116/3200 train_loss:3.5166 train_time:299015ms step_avg:141.98ms step:2117/3200 train_loss:3.5009 train_time:299151ms step_avg:141.98ms step:2118/3200 train_loss:3.5570 train_time:299292ms step_avg:141.98ms step:2119/3200 train_loss:3.4151 train_time:299433ms step_avg:141.98ms step:2120/3200 train_loss:3.4702 train_time:299574ms step_avg:141.98ms step:2121/3200 train_loss:3.1763 train_time:299714ms step_avg:141.98ms step:2122/3200 train_loss:3.3780 train_time:299855ms step_avg:141.98ms step:2123/3200 train_loss:3.5380 train_time:299995ms step_avg:141.98ms step:2124/3200 train_loss:3.4532 train_time:300135ms step_avg:141.97ms step:2125/3200 train_loss:3.6140 train_time:300274ms step_avg:141.97ms step:2125/3200 val_loss:3.4870 train_time:300331ms step_avg:142.00ms step:2126/3200 train_loss:3.4724 train_time:300422ms step_avg:141.98ms step:2127/3200 train_loss:3.5902 train_time:300571ms step_avg:141.98ms step:2128/3200 train_loss:3.5699 train_time:300712ms step_avg:141.98ms step:2129/3200 train_loss:3.4325 train_time:300850ms step_avg:141.98ms step:2130/3200 train_loss:3.4156 train_time:300989ms step_avg:141.98ms step:2131/3200 train_loss:3.4381 train_time:301127ms step_avg:141.97ms step:2132/3200 train_loss:3.5879 train_time:301268ms step_avg:141.97ms step:2133/3200 train_loss:3.4679 train_time:301413ms step_avg:141.98ms step:2134/3200 train_loss:3.3813 train_time:301557ms step_avg:141.98ms step:2135/3200 train_loss:3.4478 train_time:301697ms step_avg:141.98ms step:2136/3200 train_loss:3.5678 train_time:301836ms step_avg:141.97ms step:2137/3200 train_loss:3.5793 train_time:301975ms step_avg:141.97ms step:2138/3200 train_loss:3.5253 train_time:302114ms step_avg:141.97ms step:2139/3200 train_loss:3.5131 train_time:302254ms step_avg:141.97ms step:2140/3200 train_loss:3.5018 train_time:302395ms step_avg:141.97ms step:2141/3200 train_loss:3.5846 train_time:302537ms step_avg:141.97ms step:2142/3200 train_loss:3.8846 train_time:302679ms step_avg:141.97ms step:2143/3200 train_loss:3.4167 train_time:302820ms step_avg:141.97ms step:2144/3200 train_loss:3.4478 train_time:302963ms step_avg:141.97ms step:2145/3200 train_loss:3.4873 train_time:303099ms step_avg:141.97ms step:2146/3200 train_loss:3.6130 train_time:303239ms step_avg:141.97ms step:2147/3200 train_loss:3.5430 train_time:303379ms step_avg:141.97ms step:2148/3200 train_loss:3.9530 train_time:303521ms step_avg:141.96ms step:2149/3200 train_loss:3.4715 train_time:303663ms step_avg:141.96ms step:2150/3200 train_loss:3.4391 train_time:303804ms step_avg:141.96ms step:2151/3200 train_loss:3.5104 train_time:303944ms step_avg:141.96ms step:2152/3200 train_loss:3.5387 train_time:304084ms step_avg:141.96ms step:2153/3200 train_loss:3.4959 train_time:304224ms step_avg:141.96ms step:2154/3200 train_loss:3.4343 train_time:304366ms step_avg:141.96ms step:2155/3200 train_loss:3.6415 train_time:304505ms step_avg:141.96ms step:2156/3200 train_loss:3.2615 train_time:304647ms step_avg:141.96ms step:2157/3200 train_loss:3.4256 train_time:304790ms step_avg:141.96ms step:2158/3200 train_loss:3.5559 train_time:304930ms step_avg:141.96ms step:2159/3200 train_loss:3.4945 train_time:305070ms step_avg:141.96ms step:2160/3200 train_loss:3.6541 train_time:305210ms step_avg:141.96ms step:2161/3200 train_loss:3.5702 train_time:305350ms step_avg:141.96ms step:2162/3200 train_loss:3.4926 train_time:305490ms step_avg:141.96ms step:2163/3200 train_loss:3.4720 train_time:305630ms step_avg:141.96ms step:2164/3200 train_loss:3.4597 train_time:305773ms step_avg:141.96ms step:2165/3200 train_loss:3.5433 train_time:305916ms step_avg:141.96ms step:2166/3200 train_loss:3.5671 train_time:306053ms step_avg:141.95ms step:2167/3200 train_loss:3.4926 train_time:306192ms step_avg:141.95ms step:2168/3200 train_loss:3.3969 train_time:306333ms step_avg:141.95ms step:2169/3200 train_loss:3.4795 train_time:306472ms step_avg:141.95ms step:2170/3200 train_loss:3.5207 train_time:306613ms step_avg:141.95ms step:2171/3200 train_loss:3.6411 train_time:306754ms step_avg:141.95ms step:2172/3200 train_loss:3.4438 train_time:306894ms step_avg:141.95ms step:2173/3200 train_loss:3.4244 train_time:307033ms step_avg:141.95ms step:2174/3200 train_loss:3.4354 train_time:307172ms step_avg:141.95ms step:2175/3200 train_loss:3.4848 train_time:307314ms step_avg:141.95ms step:2176/3200 train_loss:3.4513 train_time:307453ms step_avg:141.94ms step:2177/3200 train_loss:3.4202 train_time:307592ms step_avg:141.94ms step:2178/3200 train_loss:3.6423 train_time:307733ms step_avg:141.94ms step:2179/3200 train_loss:3.4650 train_time:307872ms step_avg:141.94ms step:2180/3200 train_loss:3.4757 train_time:308013ms step_avg:141.94ms step:2181/3200 train_loss:3.5319 train_time:308153ms step_avg:141.94ms step:2182/3200 train_loss:3.5113 train_time:308293ms step_avg:141.94ms step:2183/3200 train_loss:3.4799 train_time:308433ms step_avg:141.94ms step:2184/3200 train_loss:3.3769 train_time:308573ms step_avg:141.94ms step:2185/3200 train_loss:3.5561 train_time:308713ms step_avg:141.94ms step:2186/3200 train_loss:3.7201 train_time:308853ms step_avg:141.94ms step:2187/3200 train_loss:3.3652 train_time:308993ms step_avg:141.94ms step:2188/3200 train_loss:3.4085 train_time:309132ms step_avg:141.93ms step:2189/3200 train_loss:3.2597 train_time:309271ms step_avg:141.93ms step:2190/3200 train_loss:3.4045 train_time:309412ms step_avg:141.93ms step:2191/3200 train_loss:3.5522 train_time:309552ms step_avg:141.93ms step:2192/3200 train_loss:3.4900 train_time:309691ms step_avg:141.93ms step:2193/3200 train_loss:3.7214 train_time:309831ms step_avg:141.93ms step:2194/3200 train_loss:3.4839 train_time:309972ms step_avg:141.93ms step:2195/3200 train_loss:3.5505 train_time:310111ms step_avg:141.93ms step:2196/3200 train_loss:3.4897 train_time:310252ms step_avg:141.93ms step:2197/3200 train_loss:3.4093 train_time:310391ms step_avg:141.93ms step:2198/3200 train_loss:3.4948 train_time:310532ms step_avg:141.92ms step:2199/3200 train_loss:3.4332 train_time:310671ms step_avg:141.92ms step:2200/3200 train_loss:3.4343 train_time:310811ms step_avg:141.92ms step:2201/3200 train_loss:3.4932 train_time:310952ms step_avg:141.92ms step:2202/3200 train_loss:3.4728 train_time:311092ms step_avg:141.92ms step:2203/3200 train_loss:3.4525 train_time:311231ms step_avg:141.92ms step:2204/3200 train_loss:3.9516 train_time:311370ms step_avg:141.92ms step:2205/3200 train_loss:3.3628 train_time:311511ms step_avg:141.92ms step:2206/3200 train_loss:3.4888 train_time:311651ms step_avg:141.92ms step:2207/3200 train_loss:3.5020 train_time:311793ms step_avg:141.92ms step:2208/3200 train_loss:3.5190 train_time:311933ms step_avg:141.92ms step:2209/3200 train_loss:3.4153 train_time:312072ms step_avg:141.92ms step:2210/3200 train_loss:3.4926 train_time:312213ms step_avg:141.91ms step:2211/3200 train_loss:3.5034 train_time:312353ms step_avg:141.91ms step:2212/3200 train_loss:3.4956 train_time:312492ms step_avg:141.91ms step:2213/3200 train_loss:3.5217 train_time:312632ms step_avg:141.91ms step:2214/3200 train_loss:3.3814 train_time:312772ms step_avg:141.91ms step:2215/3200 train_loss:3.4516 train_time:312913ms step_avg:141.91ms step:2216/3200 train_loss:3.5846 train_time:313053ms step_avg:141.91ms step:2217/3200 train_loss:3.5392 train_time:313193ms step_avg:141.91ms step:2218/3200 train_loss:3.5007 train_time:313333ms step_avg:141.91ms step:2219/3200 train_loss:3.5046 train_time:313472ms step_avg:141.91ms step:2220/3200 train_loss:3.4141 train_time:313612ms step_avg:141.91ms step:2221/3200 train_loss:3.6711 train_time:313752ms step_avg:141.91ms step:2222/3200 train_loss:3.5596 train_time:313892ms step_avg:141.90ms step:2223/3200 train_loss:3.5816 train_time:314034ms step_avg:141.90ms step:2224/3200 train_loss:3.4647 train_time:314173ms step_avg:141.90ms step:2225/3200 train_loss:3.5916 train_time:314313ms step_avg:141.90ms step:2226/3200 train_loss:3.3462 train_time:314453ms step_avg:141.90ms step:2227/3200 train_loss:3.6171 train_time:314592ms step_avg:141.90ms step:2228/3200 train_loss:3.5520 train_time:314733ms step_avg:141.90ms step:2229/3200 train_loss:3.3518 train_time:314872ms step_avg:141.90ms step:2230/3200 train_loss:3.6968 train_time:315012ms step_avg:141.90ms step:2231/3200 train_loss:3.3923 train_time:315152ms step_avg:141.90ms step:2232/3200 train_loss:3.8580 train_time:315291ms step_avg:141.90ms step:2233/3200 train_loss:3.5500 train_time:315431ms step_avg:141.89ms step:2234/3200 train_loss:3.4931 train_time:315570ms step_avg:141.89ms step:2235/3200 train_loss:3.5242 train_time:315711ms step_avg:141.89ms step:2236/3200 train_loss:3.3117 train_time:315854ms step_avg:141.89ms step:2237/3200 train_loss:3.3113 train_time:315991ms step_avg:141.89ms step:2238/3200 train_loss:3.5340 train_time:316132ms step_avg:141.89ms step:2239/3200 train_loss:3.6283 train_time:316273ms step_avg:141.89ms step:2240/3200 train_loss:3.3501 train_time:316412ms step_avg:141.89ms step:2241/3200 train_loss:3.4170 train_time:316552ms step_avg:141.89ms step:2242/3200 train_loss:3.5966 train_time:316691ms step_avg:141.89ms step:2243/3200 train_loss:3.5653 train_time:316832ms step_avg:141.89ms step:2244/3200 train_loss:3.4223 train_time:316971ms step_avg:141.89ms step:2245/3200 train_loss:3.4972 train_time:317111ms step_avg:141.88ms step:2246/3200 train_loss:3.5159 train_time:317251ms step_avg:141.88ms step:2247/3200 train_loss:3.3458 train_time:317392ms step_avg:141.88ms step:2248/3200 train_loss:3.3624 train_time:317532ms step_avg:141.88ms step:2249/3200 train_loss:3.6161 train_time:317671ms step_avg:141.88ms step:2250/3200 train_loss:3.3527 train_time:317811ms step_avg:141.88ms step:2250/3200 val_loss:3.4763 train_time:317868ms step_avg:141.91ms step:2251/3200 train_loss:3.3508 train_time:317964ms step_avg:141.88ms step:2252/3200 train_loss:3.4258 train_time:318106ms step_avg:141.88ms step:2253/3200 train_loss:3.4041 train_time:318245ms step_avg:141.88ms step:2254/3200 train_loss:3.4459 train_time:318385ms step_avg:141.88ms step:2255/3200 train_loss:3.4995 train_time:318525ms step_avg:141.88ms step:2256/3200 train_loss:3.3782 train_time:318664ms step_avg:141.88ms step:2257/3200 train_loss:3.6656 train_time:318804ms step_avg:141.88ms step:2258/3200 train_loss:3.5463 train_time:318947ms step_avg:141.88ms step:2259/3200 train_loss:3.8535 train_time:319091ms step_avg:141.88ms step:2260/3200 train_loss:3.5434 train_time:319231ms step_avg:141.88ms step:2261/3200 train_loss:3.5941 train_time:319370ms step_avg:141.88ms step:2262/3200 train_loss:3.5033 train_time:319511ms step_avg:141.88ms step:2263/3200 train_loss:3.5100 train_time:319651ms step_avg:141.88ms step:2264/3200 train_loss:3.2678 train_time:319790ms step_avg:141.88ms step:2265/3200 train_loss:3.3940 train_time:319931ms step_avg:141.88ms step:2266/3200 train_loss:3.6040 train_time:320074ms step_avg:141.88ms step:2267/3200 train_loss:3.3371 train_time:320215ms step_avg:141.88ms step:2268/3200 train_loss:3.4145 train_time:320524ms step_avg:141.95ms step:2269/3200 train_loss:3.3900 train_time:320656ms step_avg:141.95ms step:2270/3200 train_loss:3.3541 train_time:320795ms step_avg:141.94ms step:2271/3200 train_loss:3.7540 train_time:320933ms step_avg:141.94ms step:2272/3200 train_loss:3.4116 train_time:321071ms step_avg:141.94ms step:2273/3200 train_loss:3.4169 train_time:321210ms step_avg:141.94ms step:2274/3200 train_loss:3.5000 train_time:321348ms step_avg:141.94ms step:2275/3200 train_loss:3.4512 train_time:321493ms step_avg:141.94ms step:2276/3200 train_loss:3.4618 train_time:321637ms step_avg:141.94ms step:2277/3200 train_loss:3.3431 train_time:321777ms step_avg:141.94ms step:2278/3200 train_loss:3.4469 train_time:321917ms step_avg:141.94ms step:2279/3200 train_loss:3.5788 train_time:322056ms step_avg:141.94ms step:2280/3200 train_loss:3.3804 train_time:322358ms step_avg:142.01ms step:2281/3200 train_loss:3.4391 train_time:322497ms step_avg:142.01ms step:2282/3200 train_loss:3.4519 train_time:322636ms step_avg:142.01ms step:2283/3200 train_loss:3.5891 train_time:322775ms step_avg:142.00ms step:2284/3200 train_loss:3.4672 train_time:322914ms step_avg:142.00ms step:2285/3200 train_loss:3.4897 train_time:323052ms step_avg:142.00ms step:2286/3200 train_loss:3.4878 train_time:323192ms step_avg:142.00ms step:2287/3200 train_loss:3.4854 train_time:323337ms step_avg:142.00ms step:2288/3200 train_loss:3.4392 train_time:323478ms step_avg:142.00ms step:2289/3200 train_loss:3.5747 train_time:323618ms step_avg:142.00ms step:2290/3200 train_loss:3.5468 train_time:323761ms step_avg:142.00ms step:2291/3200 train_loss:3.4313 train_time:323898ms step_avg:142.00ms step:2292/3200 train_loss:3.7690 train_time:324037ms step_avg:142.00ms step:2293/3200 train_loss:3.4298 train_time:324177ms step_avg:142.00ms step:2294/3200 train_loss:3.3764 train_time:324318ms step_avg:142.00ms step:2295/3200 train_loss:3.5586 train_time:324459ms step_avg:142.00ms step:2296/3200 train_loss:3.5081 train_time:324599ms step_avg:141.99ms step:2297/3200 train_loss:3.4788 train_time:324740ms step_avg:141.99ms step:2298/3200 train_loss:3.8575 train_time:324880ms step_avg:141.99ms step:2299/3200 train_loss:3.3759 train_time:325020ms step_avg:141.99ms step:2300/3200 train_loss:3.3783 train_time:325159ms step_avg:141.99ms step:2301/3200 train_loss:3.7126 train_time:325299ms step_avg:141.99ms step:2302/3200 train_loss:3.4443 train_time:325439ms step_avg:141.99ms step:2303/3200 train_loss:3.4548 train_time:325579ms step_avg:141.99ms step:2304/3200 train_loss:3.4456 train_time:325720ms step_avg:141.99ms step:2305/3200 train_loss:3.3806 train_time:325861ms step_avg:141.99ms step:2306/3200 train_loss:3.5390 train_time:326001ms step_avg:141.99ms step:2307/3200 train_loss:3.3983 train_time:326141ms step_avg:141.99ms step:2308/3200 train_loss:3.4156 train_time:326279ms step_avg:141.98ms step:2309/3200 train_loss:3.5465 train_time:326420ms step_avg:141.98ms step:2310/3200 train_loss:3.5093 train_time:326561ms step_avg:141.98ms step:2311/3200 train_loss:3.3755 train_time:326702ms step_avg:141.98ms step:2312/3200 train_loss:3.4852 train_time:326843ms step_avg:141.98ms step:2313/3200 train_loss:3.6172 train_time:326984ms step_avg:141.98ms step:2314/3200 train_loss:3.4328 train_time:327123ms step_avg:141.98ms step:2315/3200 train_loss:3.3624 train_time:327264ms step_avg:141.98ms step:2316/3200 train_loss:3.4501 train_time:327404ms step_avg:141.98ms step:2317/3200 train_loss:3.3389 train_time:327545ms step_avg:141.98ms step:2318/3200 train_loss:3.4440 train_time:327687ms step_avg:141.98ms step:2319/3200 train_loss:3.4606 train_time:327828ms step_avg:141.98ms step:2320/3200 train_loss:3.3035 train_time:327970ms step_avg:141.98ms step:2321/3200 train_loss:3.4451 train_time:328110ms step_avg:141.98ms step:2322/3200 train_loss:3.4943 train_time:328249ms step_avg:141.98ms step:2323/3200 train_loss:3.4050 train_time:328390ms step_avg:141.98ms step:2324/3200 train_loss:3.4569 train_time:328531ms step_avg:141.98ms step:2325/3200 train_loss:3.3748 train_time:328671ms step_avg:141.97ms step:2326/3200 train_loss:3.5181 train_time:328813ms step_avg:141.97ms step:2327/3200 train_loss:3.5216 train_time:328952ms step_avg:141.97ms step:2328/3200 train_loss:3.3007 train_time:329091ms step_avg:141.97ms step:2329/3200 train_loss:3.4083 train_time:329231ms step_avg:141.97ms step:2330/3200 train_loss:3.4349 train_time:329371ms step_avg:141.97ms step:2331/3200 train_loss:3.4060 train_time:329512ms step_avg:141.97ms step:2332/3200 train_loss:3.5792 train_time:329652ms step_avg:141.97ms step:2333/3200 train_loss:3.4674 train_time:329792ms step_avg:141.97ms step:2334/3200 train_loss:3.4429 train_time:329936ms step_avg:141.97ms step:2335/3200 train_loss:3.5259 train_time:330073ms step_avg:141.97ms step:2336/3200 train_loss:3.3731 train_time:330213ms step_avg:141.97ms step:2337/3200 train_loss:3.5207 train_time:330353ms step_avg:141.97ms step:2338/3200 train_loss:3.4716 train_time:330493ms step_avg:141.96ms step:2339/3200 train_loss:3.4295 train_time:330633ms step_avg:141.96ms step:2340/3200 train_loss:3.5072 train_time:330772ms step_avg:141.96ms step:2341/3200 train_loss:3.5483 train_time:330913ms step_avg:141.96ms step:2342/3200 train_loss:3.4230 train_time:331053ms step_avg:141.96ms step:2343/3200 train_loss:3.4322 train_time:331194ms step_avg:141.96ms step:2344/3200 train_loss:3.4945 train_time:331334ms step_avg:141.96ms step:2345/3200 train_loss:3.4414 train_time:331474ms step_avg:141.96ms step:2346/3200 train_loss:3.5661 train_time:331614ms step_avg:141.96ms step:2347/3200 train_loss:3.4674 train_time:331754ms step_avg:141.96ms step:2348/3200 train_loss:3.5704 train_time:331894ms step_avg:141.96ms step:2349/3200 train_loss:3.5324 train_time:332034ms step_avg:141.96ms step:2350/3200 train_loss:3.5756 train_time:332176ms step_avg:141.96ms step:2351/3200 train_loss:3.2691 train_time:332313ms step_avg:141.95ms step:2352/3200 train_loss:3.3815 train_time:332453ms step_avg:141.95ms step:2353/3200 train_loss:3.3794 train_time:332594ms step_avg:141.95ms step:2354/3200 train_loss:3.5981 train_time:332735ms step_avg:141.95ms step:2355/3200 train_loss:3.3972 train_time:332875ms step_avg:141.95ms step:2356/3200 train_loss:3.3834 train_time:333015ms step_avg:141.95ms step:2357/3200 train_loss:3.5381 train_time:333155ms step_avg:141.95ms step:2358/3200 train_loss:3.4042 train_time:333296ms step_avg:141.95ms step:2359/3200 train_loss:3.5000 train_time:333436ms step_avg:141.95ms step:2360/3200 train_loss:3.3917 train_time:333577ms step_avg:141.95ms step:2361/3200 train_loss:3.4089 train_time:333716ms step_avg:141.95ms step:2362/3200 train_loss:3.4397 train_time:333855ms step_avg:141.95ms step:2363/3200 train_loss:3.5042 train_time:333995ms step_avg:141.94ms step:2364/3200 train_loss:3.4433 train_time:334134ms step_avg:141.94ms step:2365/3200 train_loss:3.8838 train_time:334274ms step_avg:141.94ms step:2366/3200 train_loss:3.5118 train_time:334416ms step_avg:141.94ms step:2367/3200 train_loss:3.6577 train_time:334553ms step_avg:141.94ms step:2368/3200 train_loss:3.4749 train_time:334694ms step_avg:141.94ms step:2369/3200 train_loss:3.4819 train_time:334834ms step_avg:141.94ms step:2370/3200 train_loss:3.5112 train_time:334974ms step_avg:141.94ms step:2371/3200 train_loss:3.3969 train_time:335114ms step_avg:141.94ms step:2372/3200 train_loss:3.6224 train_time:335254ms step_avg:141.94ms step:2373/3200 train_loss:3.4687 train_time:335394ms step_avg:141.94ms step:2374/3200 train_loss:4.0280 train_time:335534ms step_avg:141.93ms step:2375/3200 train_loss:3.4508 train_time:335674ms step_avg:141.93ms step:2375/3200 val_loss:3.4595 train_time:335730ms step_avg:141.96ms step:2376/3200 train_loss:3.3607 train_time:335827ms step_avg:141.94ms step:2377/3200 train_loss:3.5234 train_time:335969ms step_avg:141.94ms step:2378/3200 train_loss:3.4859 train_time:336110ms step_avg:141.94ms step:2379/3200 train_loss:3.5035 train_time:336251ms step_avg:141.94ms step:2380/3200 train_loss:3.4871 train_time:336390ms step_avg:141.94ms step:2381/3200 train_loss:3.3868 train_time:336529ms step_avg:141.94ms step:2382/3200 train_loss:3.4787 train_time:336669ms step_avg:141.93ms step:2383/3200 train_loss:3.5009 train_time:336812ms step_avg:141.94ms step:2384/3200 train_loss:3.4479 train_time:336957ms step_avg:141.94ms step:2385/3200 train_loss:3.3781 train_time:337097ms step_avg:141.94ms step:2386/3200 train_loss:3.4869 train_time:337237ms step_avg:141.93ms step:2387/3200 train_loss:3.4463 train_time:337381ms step_avg:141.94ms step:2388/3200 train_loss:3.4529 train_time:337516ms step_avg:141.93ms step:2389/3200 train_loss:3.4822 train_time:337655ms step_avg:141.93ms step:2390/3200 train_loss:3.4655 train_time:337796ms step_avg:141.93ms step:2391/3200 train_loss:3.4633 train_time:337939ms step_avg:141.93ms step:2392/3200 train_loss:3.3424 train_time:338080ms step_avg:141.93ms step:2393/3200 train_loss:3.5646 train_time:338220ms step_avg:141.93ms step:2394/3200 train_loss:3.3981 train_time:338361ms step_avg:141.93ms step:2395/3200 train_loss:3.4993 train_time:338501ms step_avg:141.93ms step:2396/3200 train_loss:3.6233 train_time:338640ms step_avg:141.93ms step:2397/3200 train_loss:3.6228 train_time:338781ms step_avg:141.93ms step:2398/3200 train_loss:3.5797 train_time:338920ms step_avg:141.93ms step:2399/3200 train_loss:3.5445 train_time:339060ms step_avg:141.93ms step:2400/3200 train_loss:3.4206 train_time:339201ms step_avg:141.92ms step:2401/3200 train_loss:3.4166 train_time:339340ms step_avg:141.92ms step:2402/3200 train_loss:3.5235 train_time:339480ms step_avg:141.92ms step:2403/3200 train_loss:3.3623 train_time:339619ms step_avg:141.92ms step:2404/3200 train_loss:3.4976 train_time:339759ms step_avg:141.92ms step:2405/3200 train_loss:3.7056 train_time:339899ms step_avg:141.92ms step:2406/3200 train_loss:3.4311 train_time:340038ms step_avg:141.92ms step:2407/3200 train_loss:3.5850 train_time:340182ms step_avg:141.92ms step:2408/3200 train_loss:3.4380 train_time:340319ms step_avg:141.92ms step:2409/3200 train_loss:3.3725 train_time:340459ms step_avg:141.92ms step:2410/3200 train_loss:3.5087 train_time:340599ms step_avg:141.92ms step:2411/3200 train_loss:3.2968 train_time:340739ms step_avg:141.92ms step:2412/3200 train_loss:3.7347 train_time:340879ms step_avg:141.91ms step:2413/3200 train_loss:3.4151 train_time:341020ms step_avg:141.91ms step:2414/3200 train_loss:3.4916 train_time:341160ms step_avg:141.91ms step:2415/3200 train_loss:3.4150 train_time:341300ms step_avg:141.91ms step:2416/3200 train_loss:3.4858 train_time:341439ms step_avg:141.91ms step:2417/3200 train_loss:3.3095 train_time:341579ms step_avg:141.91ms step:2418/3200 train_loss:3.2290 train_time:341719ms step_avg:141.91ms step:2419/3200 train_loss:3.5294 train_time:341859ms step_avg:141.91ms step:2420/3200 train_loss:3.4042 train_time:342004ms step_avg:141.91ms step:2421/3200 train_loss:3.4348 train_time:342139ms step_avg:141.91ms step:2422/3200 train_loss:3.5410 train_time:342280ms step_avg:141.91ms step:2423/3200 train_loss:3.5843 train_time:342420ms step_avg:141.91ms step:2424/3200 train_loss:3.4030 train_time:342560ms step_avg:141.91ms step:2425/3200 train_loss:3.4947 train_time:342700ms step_avg:141.90ms step:2426/3200 train_loss:3.4956 train_time:342840ms step_avg:141.90ms step:2427/3200 train_loss:3.4196 train_time:342980ms step_avg:141.90ms step:2428/3200 train_loss:3.3684 train_time:343121ms step_avg:141.90ms step:2429/3200 train_loss:3.5002 train_time:343262ms step_avg:141.90ms step:2430/3200 train_loss:3.3960 train_time:343402ms step_avg:141.90ms step:2431/3200 train_loss:3.4495 train_time:343542ms step_avg:141.90ms step:2432/3200 train_loss:3.5068 train_time:343682ms step_avg:141.90ms step:2433/3200 train_loss:3.4750 train_time:343822ms step_avg:141.90ms step:2434/3200 train_loss:3.3480 train_time:343961ms step_avg:141.90ms step:2435/3200 train_loss:3.3162 train_time:344101ms step_avg:141.90ms step:2436/3200 train_loss:3.4772 train_time:344243ms step_avg:141.90ms step:2437/3200 train_loss:3.3281 train_time:344382ms step_avg:141.90ms step:2438/3200 train_loss:3.4034 train_time:344524ms step_avg:141.90ms step:2439/3200 train_loss:3.5010 train_time:344665ms step_avg:141.90ms step:2440/3200 train_loss:3.4236 train_time:344806ms step_avg:141.90ms step:2441/3200 train_loss:3.5082 train_time:344945ms step_avg:141.89ms step:2442/3200 train_loss:3.3968 train_time:345086ms step_avg:141.89ms step:2443/3200 train_loss:3.4470 train_time:345226ms step_avg:141.89ms step:2444/3200 train_loss:3.3343 train_time:345369ms step_avg:141.89ms step:2445/3200 train_loss:3.3441 train_time:345507ms step_avg:141.89ms step:2446/3200 train_loss:3.5106 train_time:345648ms step_avg:141.89ms step:2447/3200 train_loss:3.3746 train_time:345789ms step_avg:141.89ms step:2448/3200 train_loss:3.4383 train_time:345930ms step_avg:141.89ms step:2449/3200 train_loss:3.6225 train_time:346070ms step_avg:141.89ms step:2450/3200 train_loss:3.4311 train_time:346211ms step_avg:141.89ms step:2451/3200 train_loss:3.5063 train_time:346354ms step_avg:141.89ms step:2452/3200 train_loss:3.4068 train_time:346493ms step_avg:141.89ms step:2453/3200 train_loss:3.5138 train_time:346634ms step_avg:141.89ms step:2454/3200 train_loss:3.3994 train_time:346774ms step_avg:141.89ms step:2455/3200 train_loss:3.5341 train_time:346914ms step_avg:141.89ms step:2456/3200 train_loss:3.4682 train_time:347055ms step_avg:141.89ms step:2457/3200 train_loss:3.3866 train_time:347358ms step_avg:141.95ms step:2458/3200 train_loss:3.3122 train_time:347496ms step_avg:141.95ms step:2459/3200 train_loss:3.4393 train_time:347636ms step_avg:141.95ms step:2460/3200 train_loss:4.0434 train_time:347774ms step_avg:141.95ms step:2461/3200 train_loss:3.5009 train_time:347912ms step_avg:141.95ms step:2462/3200 train_loss:3.3247 train_time:348051ms step_avg:141.95ms step:2463/3200 train_loss:3.5165 train_time:348190ms step_avg:141.94ms step:2464/3200 train_loss:3.4343 train_time:348336ms step_avg:141.95ms step:2465/3200 train_loss:3.6402 train_time:348481ms step_avg:141.95ms step:2466/3200 train_loss:3.8227 train_time:348620ms step_avg:141.95ms step:2467/3200 train_loss:3.5497 train_time:348759ms step_avg:141.95ms step:2468/3200 train_loss:3.4168 train_time:348898ms step_avg:141.94ms step:2469/3200 train_loss:3.5336 train_time:349036ms step_avg:141.94ms step:2470/3200 train_loss:3.5465 train_time:349339ms step_avg:142.01ms step:2471/3200 train_loss:3.3517 train_time:349476ms step_avg:142.01ms step:2472/3200 train_loss:3.4340 train_time:349614ms step_avg:142.00ms step:2473/3200 train_loss:3.4406 train_time:349752ms step_avg:142.00ms step:2474/3200 train_loss:3.5786 train_time:349891ms step_avg:142.00ms step:2475/3200 train_loss:3.7153 train_time:350030ms step_avg:142.00ms step:2476/3200 train_loss:3.2950 train_time:350170ms step_avg:142.00ms step:2477/3200 train_loss:3.5093 train_time:350317ms step_avg:142.00ms step:2478/3200 train_loss:3.4760 train_time:350460ms step_avg:142.00ms step:2479/3200 train_loss:3.3151 train_time:350599ms step_avg:142.00ms step:2480/3200 train_loss:3.3053 train_time:350737ms step_avg:142.00ms step:2481/3200 train_loss:3.4573 train_time:350877ms step_avg:142.00ms step:2482/3200 train_loss:3.4679 train_time:351017ms step_avg:142.00ms step:2483/3200 train_loss:3.4785 train_time:351157ms step_avg:142.00ms step:2484/3200 train_loss:3.4350 train_time:351299ms step_avg:142.00ms step:2485/3200 train_loss:3.4478 train_time:351441ms step_avg:142.00ms step:2486/3200 train_loss:3.3370 train_time:351583ms step_avg:142.00ms step:2487/3200 train_loss:3.5379 train_time:351724ms step_avg:142.00ms step:2488/3200 train_loss:3.4902 train_time:351862ms step_avg:141.99ms step:2489/3200 train_loss:3.3888 train_time:352002ms step_avg:141.99ms step:2490/3200 train_loss:3.4993 train_time:352143ms step_avg:141.99ms step:2491/3200 train_loss:3.5531 train_time:352283ms step_avg:141.99ms step:2492/3200 train_loss:3.6313 train_time:352423ms step_avg:141.99ms step:2493/3200 train_loss:3.4822 train_time:352564ms step_avg:141.99ms step:2494/3200 train_loss:3.4033 train_time:352705ms step_avg:141.99ms step:2495/3200 train_loss:3.5343 train_time:352844ms step_avg:141.99ms step:2496/3200 train_loss:3.4786 train_time:352983ms step_avg:141.99ms step:2497/3200 train_loss:3.3944 train_time:353123ms step_avg:141.99ms step:2498/3200 train_loss:3.4925 train_time:353263ms step_avg:141.99ms step:2499/3200 train_loss:3.5428 train_time:353404ms step_avg:141.99ms step:2500/3200 train_loss:3.5601 train_time:353544ms step_avg:141.99ms step:2500/3200 val_loss:3.4349 train_time:353600ms step_avg:142.01ms step:2501/3200 train_loss:3.5007 train_time:353697ms step_avg:141.99ms step:2502/3200 train_loss:3.4588 train_time:353841ms step_avg:141.99ms step:2503/3200 train_loss:3.4766 train_time:353981ms step_avg:141.99ms step:2504/3200 train_loss:3.3471 train_time:354119ms step_avg:141.99ms step:2505/3200 train_loss:3.5426 train_time:354257ms step_avg:141.99ms step:2506/3200 train_loss:3.4869 train_time:354395ms step_avg:141.99ms step:2507/3200 train_loss:3.4319 train_time:354534ms step_avg:141.98ms step:2508/3200 train_loss:3.4425 train_time:354677ms step_avg:141.98ms step:2509/3200 train_loss:3.3973 train_time:354822ms step_avg:141.99ms step:2510/3200 train_loss:3.5847 train_time:354962ms step_avg:141.98ms step:2511/3200 train_loss:3.3985 train_time:355101ms step_avg:141.98ms step:2512/3200 train_loss:3.3896 train_time:355241ms step_avg:141.98ms step:2513/3200 train_loss:3.4662 train_time:355380ms step_avg:141.98ms step:2514/3200 train_loss:3.4902 train_time:355519ms step_avg:141.98ms step:2515/3200 train_loss:3.3917 train_time:355661ms step_avg:141.98ms step:2516/3200 train_loss:3.4780 train_time:355802ms step_avg:141.98ms step:2517/3200 train_loss:3.4758 train_time:355942ms step_avg:141.98ms step:2518/3200 train_loss:3.3532 train_time:356083ms step_avg:141.98ms step:2519/3200 train_loss:3.3798 train_time:356222ms step_avg:141.98ms step:2520/3200 train_loss:3.5008 train_time:356361ms step_avg:141.98ms step:2521/3200 train_loss:3.4933 train_time:356501ms step_avg:141.98ms step:2522/3200 train_loss:3.3780 train_time:356642ms step_avg:141.98ms step:2523/3200 train_loss:3.3583 train_time:356783ms step_avg:141.97ms step:2524/3200 train_loss:3.4550 train_time:356924ms step_avg:141.97ms step:2525/3200 train_loss:3.3026 train_time:357065ms step_avg:141.97ms step:2526/3200 train_loss:3.5241 train_time:357206ms step_avg:141.97ms step:2527/3200 train_loss:3.4248 train_time:357347ms step_avg:141.97ms step:2528/3200 train_loss:3.4320 train_time:357487ms step_avg:141.97ms step:2529/3200 train_loss:3.4203 train_time:357627ms step_avg:141.97ms step:2530/3200 train_loss:3.4392 train_time:357768ms step_avg:141.97ms step:2531/3200 train_loss:3.4696 train_time:357909ms step_avg:141.97ms step:2532/3200 train_loss:3.2960 train_time:358051ms step_avg:141.97ms step:2533/3200 train_loss:3.4589 train_time:358192ms step_avg:141.97ms step:2534/3200 train_loss:3.3515 train_time:358333ms step_avg:141.97ms step:2535/3200 train_loss:3.3853 train_time:358473ms step_avg:141.97ms step:2536/3200 train_loss:3.4421 train_time:358615ms step_avg:141.97ms step:2537/3200 train_loss:3.4585 train_time:358757ms step_avg:141.97ms step:2538/3200 train_loss:3.2780 train_time:358898ms step_avg:141.97ms step:2539/3200 train_loss:3.5902 train_time:359044ms step_avg:141.97ms step:2540/3200 train_loss:3.2743 train_time:359179ms step_avg:141.97ms step:2541/3200 train_loss:3.4583 train_time:359320ms step_avg:141.97ms step:2542/3200 train_loss:3.2159 train_time:359458ms step_avg:141.97ms step:2543/3200 train_loss:3.6671 train_time:359599ms step_avg:141.97ms step:2544/3200 train_loss:3.4272 train_time:359739ms step_avg:141.96ms step:2545/3200 train_loss:3.5806 train_time:359879ms step_avg:141.96ms step:2546/3200 train_loss:3.4223 train_time:360019ms step_avg:141.96ms step:2547/3200 train_loss:3.4022 train_time:360160ms step_avg:141.96ms step:2548/3200 train_loss:3.4044 train_time:360299ms step_avg:141.96ms step:2549/3200 train_loss:3.5666 train_time:360439ms step_avg:141.96ms step:2550/3200 train_loss:3.4205 train_time:360579ms step_avg:141.96ms step:2551/3200 train_loss:3.4246 train_time:360718ms step_avg:141.96ms step:2552/3200 train_loss:3.4481 train_time:360860ms step_avg:141.96ms step:2553/3200 train_loss:3.4713 train_time:361000ms step_avg:141.96ms step:2554/3200 train_loss:3.3820 train_time:361143ms step_avg:141.96ms step:2555/3200 train_loss:3.4874 train_time:361280ms step_avg:141.96ms step:2556/3200 train_loss:3.5382 train_time:361420ms step_avg:141.96ms step:2557/3200 train_loss:3.5328 train_time:361561ms step_avg:141.96ms step:2558/3200 train_loss:3.3717 train_time:361701ms step_avg:141.95ms step:2559/3200 train_loss:3.3727 train_time:361841ms step_avg:141.95ms step:2560/3200 train_loss:3.3874 train_time:361981ms step_avg:141.95ms step:2561/3200 train_loss:3.5030 train_time:362120ms step_avg:141.95ms step:2562/3200 train_loss:3.5403 train_time:362261ms step_avg:141.95ms step:2563/3200 train_loss:3.4162 train_time:362400ms step_avg:141.95ms step:2564/3200 train_loss:3.4515 train_time:362539ms step_avg:141.95ms step:2565/3200 train_loss:3.3666 train_time:362680ms step_avg:141.95ms step:2566/3200 train_loss:3.3812 train_time:362819ms step_avg:141.95ms step:2567/3200 train_loss:3.3726 train_time:362959ms step_avg:141.95ms step:2568/3200 train_loss:3.4195 train_time:363100ms step_avg:141.95ms step:2569/3200 train_loss:3.5657 train_time:363240ms step_avg:141.95ms step:2570/3200 train_loss:3.4702 train_time:363380ms step_avg:141.95ms step:2571/3200 train_loss:3.5508 train_time:363519ms step_avg:141.94ms step:2572/3200 train_loss:3.3054 train_time:363659ms step_avg:141.94ms step:2573/3200 train_loss:3.4138 train_time:363800ms step_avg:141.94ms step:2574/3200 train_loss:3.0782 train_time:363940ms step_avg:141.94ms step:2575/3200 train_loss:3.3267 train_time:364080ms step_avg:141.94ms step:2576/3200 train_loss:3.2636 train_time:364220ms step_avg:141.94ms step:2577/3200 train_loss:3.3808 train_time:364361ms step_avg:141.94ms step:2578/3200 train_loss:3.4332 train_time:364501ms step_avg:141.94ms step:2579/3200 train_loss:3.3360 train_time:364641ms step_avg:141.94ms step:2580/3200 train_loss:3.3970 train_time:364781ms step_avg:141.94ms step:2581/3200 train_loss:3.3465 train_time:364921ms step_avg:141.94ms step:2582/3200 train_loss:3.4457 train_time:365062ms step_avg:141.94ms step:2583/3200 train_loss:3.3274 train_time:365202ms step_avg:141.94ms step:2584/3200 train_loss:3.5173 train_time:365342ms step_avg:141.94ms step:2585/3200 train_loss:3.4257 train_time:365482ms step_avg:141.93ms step:2586/3200 train_loss:3.4440 train_time:365621ms step_avg:141.93ms step:2587/3200 train_loss:3.5684 train_time:365761ms step_avg:141.93ms step:2588/3200 train_loss:3.4546 train_time:365901ms step_avg:141.93ms step:2589/3200 train_loss:3.3133 train_time:366040ms step_avg:141.93ms step:2590/3200 train_loss:3.4776 train_time:366180ms step_avg:141.93ms step:2591/3200 train_loss:3.3852 train_time:366320ms step_avg:141.93ms step:2592/3200 train_loss:3.5932 train_time:366460ms step_avg:141.93ms step:2593/3200 train_loss:3.4644 train_time:366600ms step_avg:141.93ms step:2594/3200 train_loss:3.2819 train_time:366740ms step_avg:141.93ms step:2595/3200 train_loss:3.3526 train_time:366883ms step_avg:141.93ms step:2596/3200 train_loss:3.8055 train_time:367019ms step_avg:141.93ms step:2597/3200 train_loss:3.4406 train_time:367161ms step_avg:141.93ms step:2598/3200 train_loss:3.4396 train_time:367302ms step_avg:141.92ms step:2599/3200 train_loss:3.2929 train_time:367441ms step_avg:141.92ms step:2600/3200 train_loss:3.5328 train_time:367581ms step_avg:141.92ms step:2601/3200 train_loss:3.7004 train_time:367720ms step_avg:141.92ms step:2602/3200 train_loss:3.2799 train_time:367862ms step_avg:141.92ms step:2603/3200 train_loss:3.4216 train_time:368002ms step_avg:141.92ms step:2604/3200 train_loss:3.2544 train_time:368141ms step_avg:141.92ms step:2605/3200 train_loss:3.5462 train_time:368281ms step_avg:141.92ms step:2606/3200 train_loss:3.4148 train_time:368422ms step_avg:141.92ms step:2607/3200 train_loss:3.3105 train_time:368562ms step_avg:141.92ms step:2608/3200 train_loss:3.2670 train_time:368702ms step_avg:141.92ms step:2609/3200 train_loss:3.3880 train_time:368842ms step_avg:141.92ms step:2610/3200 train_loss:3.5620 train_time:368983ms step_avg:141.92ms step:2611/3200 train_loss:3.4366 train_time:369123ms step_avg:141.92ms step:2612/3200 train_loss:3.2685 train_time:369263ms step_avg:141.92ms step:2613/3200 train_loss:3.3593 train_time:369403ms step_avg:141.91ms step:2614/3200 train_loss:3.4770 train_time:369542ms step_avg:141.91ms step:2615/3200 train_loss:3.4149 train_time:369682ms step_avg:141.91ms step:2616/3200 train_loss:3.4032 train_time:369821ms step_avg:141.91ms step:2617/3200 train_loss:3.4417 train_time:369961ms step_avg:141.91ms step:2618/3200 train_loss:3.4807 train_time:370101ms step_avg:141.91ms step:2619/3200 train_loss:3.3322 train_time:370241ms step_avg:141.91ms step:2620/3200 train_loss:3.5052 train_time:370381ms step_avg:141.91ms step:2621/3200 train_loss:3.4664 train_time:370521ms step_avg:141.91ms step:2622/3200 train_loss:3.5958 train_time:370661ms step_avg:141.91ms step:2623/3200 train_loss:3.5072 train_time:370801ms step_avg:141.91ms step:2624/3200 train_loss:3.4230 train_time:370941ms step_avg:141.91ms step:2625/3200 train_loss:3.3808 train_time:371081ms step_avg:141.90ms step:2625/3200 val_loss:3.4106 train_time:371137ms step_avg:141.93ms step:2626/3200 train_loss:3.4069 train_time:371236ms step_avg:141.91ms step:2627/3200 train_loss:3.4630 train_time:371382ms step_avg:141.91ms step:2628/3200 train_loss:3.2893 train_time:371522ms step_avg:141.91ms step:2629/3200 train_loss:3.5558 train_time:371661ms step_avg:141.91ms step:2630/3200 train_loss:3.4320 train_time:371800ms step_avg:141.91ms step:2631/3200 train_loss:3.4875 train_time:371940ms step_avg:141.91ms step:2632/3200 train_loss:3.7068 train_time:372079ms step_avg:141.91ms step:2633/3200 train_loss:3.4579 train_time:372224ms step_avg:141.91ms step:2634/3200 train_loss:3.3763 train_time:372370ms step_avg:141.91ms step:2635/3200 train_loss:3.3543 train_time:372510ms step_avg:141.91ms step:2636/3200 train_loss:3.3995 train_time:372650ms step_avg:141.91ms step:2637/3200 train_loss:3.1770 train_time:372789ms step_avg:141.91ms step:2638/3200 train_loss:3.4900 train_time:372928ms step_avg:141.91ms step:2639/3200 train_loss:3.4640 train_time:373068ms step_avg:141.91ms step:2640/3200 train_loss:3.3475 train_time:373209ms step_avg:141.90ms step:2641/3200 train_loss:3.4379 train_time:373352ms step_avg:141.90ms step:2642/3200 train_loss:3.4673 train_time:373492ms step_avg:141.90ms step:2643/3200 train_loss:3.2631 train_time:373633ms step_avg:141.90ms step:2644/3200 train_loss:3.3836 train_time:373773ms step_avg:141.90ms step:2645/3200 train_loss:3.4539 train_time:373913ms step_avg:141.90ms step:2646/3200 train_loss:3.4135 train_time:374216ms step_avg:141.96ms step:2647/3200 train_loss:3.3091 train_time:374355ms step_avg:141.96ms step:2648/3200 train_loss:3.5307 train_time:374495ms step_avg:141.96ms step:2649/3200 train_loss:3.7929 train_time:374634ms step_avg:141.96ms step:2650/3200 train_loss:3.4280 train_time:374773ms step_avg:141.96ms step:2651/3200 train_loss:3.3942 train_time:374911ms step_avg:141.96ms step:2652/3200 train_loss:3.5234 train_time:375049ms step_avg:141.96ms step:2653/3200 train_loss:3.3596 train_time:375194ms step_avg:141.96ms step:2654/3200 train_loss:3.3439 train_time:375337ms step_avg:141.96ms step:2655/3200 train_loss:3.4219 train_time:375477ms step_avg:141.96ms step:2656/3200 train_loss:3.3366 train_time:375618ms step_avg:141.96ms step:2657/3200 train_loss:3.3738 train_time:375757ms step_avg:141.96ms step:2658/3200 train_loss:3.3399 train_time:375897ms step_avg:141.95ms step:2659/3200 train_loss:3.4188 train_time:376037ms step_avg:141.95ms step:2660/3200 train_loss:3.5666 train_time:376340ms step_avg:142.01ms step:2661/3200 train_loss:3.3656 train_time:376477ms step_avg:142.01ms step:2662/3200 train_loss:3.5173 train_time:376617ms step_avg:142.01ms step:2663/3200 train_loss:3.3803 train_time:376756ms step_avg:142.01ms step:2664/3200 train_loss:3.3764 train_time:376898ms step_avg:142.01ms step:2665/3200 train_loss:3.3026 train_time:377034ms step_avg:142.01ms step:2666/3200 train_loss:3.3536 train_time:377173ms step_avg:142.01ms step:2667/3200 train_loss:3.3927 train_time:377317ms step_avg:142.01ms step:2668/3200 train_loss:3.4294 train_time:377459ms step_avg:142.01ms step:2669/3200 train_loss:3.3446 train_time:377599ms step_avg:142.01ms step:2670/3200 train_loss:3.4149 train_time:377739ms step_avg:142.01ms step:2671/3200 train_loss:3.2961 train_time:377880ms step_avg:142.01ms step:2672/3200 train_loss:3.3624 train_time:378020ms step_avg:142.01ms step:2673/3200 train_loss:3.3494 train_time:378160ms step_avg:142.01ms step:2674/3200 train_loss:3.4140 train_time:378302ms step_avg:142.01ms step:2675/3200 train_loss:3.4351 train_time:378445ms step_avg:142.01ms step:2676/3200 train_loss:3.4099 train_time:378584ms step_avg:142.00ms step:2677/3200 train_loss:3.3953 train_time:378725ms step_avg:142.00ms step:2678/3200 train_loss:3.4256 train_time:378865ms step_avg:142.00ms step:2679/3200 train_loss:3.4720 train_time:379005ms step_avg:142.00ms step:2680/3200 train_loss:3.3748 train_time:379146ms step_avg:142.00ms step:2681/3200 train_loss:3.3095 train_time:379287ms step_avg:142.00ms step:2682/3200 train_loss:3.3513 train_time:379428ms step_avg:142.00ms step:2683/3200 train_loss:3.8213 train_time:379568ms step_avg:142.00ms step:2684/3200 train_loss:3.4051 train_time:379707ms step_avg:142.00ms step:2685/3200 train_loss:3.4376 train_time:379847ms step_avg:142.00ms step:2686/3200 train_loss:3.4801 train_time:379987ms step_avg:142.00ms step:2687/3200 train_loss:3.4019 train_time:380128ms step_avg:142.00ms step:2688/3200 train_loss:3.4786 train_time:380267ms step_avg:142.00ms step:2689/3200 train_loss:3.4106 train_time:380406ms step_avg:142.00ms step:2690/3200 train_loss:3.3988 train_time:380547ms step_avg:142.00ms step:2691/3200 train_loss:3.4255 train_time:380687ms step_avg:141.99ms step:2692/3200 train_loss:3.4963 train_time:380827ms step_avg:141.99ms step:2693/3200 train_loss:3.2926 train_time:380968ms step_avg:141.99ms step:2694/3200 train_loss:3.6737 train_time:381109ms step_avg:141.99ms step:2695/3200 train_loss:3.4714 train_time:381247ms step_avg:141.99ms step:2696/3200 train_loss:3.2645 train_time:381387ms step_avg:141.99ms step:2697/3200 train_loss:3.4575 train_time:381527ms step_avg:141.99ms step:2698/3200 train_loss:3.4254 train_time:381668ms step_avg:141.99ms step:2699/3200 train_loss:3.3751 train_time:381807ms step_avg:141.99ms step:2700/3200 train_loss:3.4739 train_time:381950ms step_avg:141.99ms step:2701/3200 train_loss:3.4429 train_time:382088ms step_avg:141.99ms step:2702/3200 train_loss:3.3526 train_time:382228ms step_avg:141.99ms step:2703/3200 train_loss:3.3698 train_time:382368ms step_avg:141.99ms step:2704/3200 train_loss:3.3852 train_time:382507ms step_avg:141.98ms step:2705/3200 train_loss:3.3566 train_time:382647ms step_avg:141.98ms step:2706/3200 train_loss:3.5252 train_time:382788ms step_avg:141.98ms step:2707/3200 train_loss:3.4896 train_time:382928ms step_avg:141.98ms step:2708/3200 train_loss:3.3920 train_time:383070ms step_avg:141.98ms step:2709/3200 train_loss:3.3923 train_time:383207ms step_avg:141.98ms step:2710/3200 train_loss:3.4941 train_time:383348ms step_avg:141.98ms step:2711/3200 train_loss:3.3672 train_time:383490ms step_avg:141.98ms step:2712/3200 train_loss:3.4859 train_time:383627ms step_avg:141.98ms step:2713/3200 train_loss:3.2184 train_time:383768ms step_avg:141.98ms step:2714/3200 train_loss:3.4217 train_time:383908ms step_avg:141.98ms step:2715/3200 train_loss:3.3043 train_time:384048ms step_avg:141.98ms step:2716/3200 train_loss:3.3204 train_time:384188ms step_avg:141.98ms step:2717/3200 train_loss:3.5119 train_time:384329ms step_avg:141.98ms step:2718/3200 train_loss:3.4068 train_time:384469ms step_avg:141.98ms step:2719/3200 train_loss:3.6439 train_time:384608ms step_avg:141.97ms step:2720/3200 train_loss:3.3856 train_time:384749ms step_avg:141.97ms step:2721/3200 train_loss:3.3752 train_time:384889ms step_avg:141.97ms step:2722/3200 train_loss:3.6079 train_time:385029ms step_avg:141.97ms step:2723/3200 train_loss:3.3742 train_time:385169ms step_avg:141.97ms step:2724/3200 train_loss:3.5477 train_time:385309ms step_avg:141.97ms step:2725/3200 train_loss:3.4254 train_time:385449ms step_avg:141.97ms step:2726/3200 train_loss:3.3849 train_time:385589ms step_avg:141.97ms step:2727/3200 train_loss:3.3951 train_time:385729ms step_avg:141.97ms step:2728/3200 train_loss:3.7344 train_time:385870ms step_avg:141.97ms step:2729/3200 train_loss:3.4667 train_time:386010ms step_avg:141.97ms step:2730/3200 train_loss:3.3312 train_time:386149ms step_avg:141.97ms step:2731/3200 train_loss:3.4351 train_time:386289ms step_avg:141.97ms step:2732/3200 train_loss:3.3438 train_time:386431ms step_avg:141.97ms step:2733/3200 train_loss:3.2342 train_time:386569ms step_avg:141.96ms step:2734/3200 train_loss:3.3408 train_time:386708ms step_avg:141.96ms step:2735/3200 train_loss:3.4205 train_time:386848ms step_avg:141.96ms step:2736/3200 train_loss:3.3081 train_time:386988ms step_avg:141.96ms step:2737/3200 train_loss:3.7117 train_time:387128ms step_avg:141.96ms step:2738/3200 train_loss:3.4577 train_time:387267ms step_avg:141.96ms step:2739/3200 train_loss:3.6511 train_time:387407ms step_avg:141.96ms step:2740/3200 train_loss:3.4033 train_time:387548ms step_avg:141.96ms step:2741/3200 train_loss:3.4005 train_time:387688ms step_avg:141.96ms step:2742/3200 train_loss:3.3414 train_time:387828ms step_avg:141.96ms step:2743/3200 train_loss:3.4129 train_time:387969ms step_avg:141.96ms step:2744/3200 train_loss:3.4185 train_time:388108ms step_avg:141.96ms step:2745/3200 train_loss:3.5280 train_time:388248ms step_avg:141.96ms step:2746/3200 train_loss:3.2826 train_time:388388ms step_avg:141.95ms step:2747/3200 train_loss:3.3823 train_time:388528ms step_avg:141.95ms step:2748/3200 train_loss:3.4179 train_time:388671ms step_avg:141.95ms step:2749/3200 train_loss:3.5324 train_time:388807ms step_avg:141.95ms step:2750/3200 train_loss:3.3717 train_time:388947ms step_avg:141.95ms step:2750/3200 val_loss:3.3881 train_time:389003ms step_avg:141.97ms step:2751/3200 train_loss:3.4488 train_time:389101ms step_avg:141.96ms step:2752/3200 train_loss:3.5034 train_time:389244ms step_avg:141.96ms step:2753/3200 train_loss:3.4086 train_time:389385ms step_avg:141.96ms step:2754/3200 train_loss:3.3368 train_time:389526ms step_avg:141.96ms step:2755/3200 train_loss:3.3460 train_time:389664ms step_avg:141.95ms step:2756/3200 train_loss:3.4279 train_time:389804ms step_avg:141.95ms step:2757/3200 train_loss:3.3588 train_time:389943ms step_avg:141.95ms step:2758/3200 train_loss:3.2394 train_time:390087ms step_avg:141.95ms step:2759/3200 train_loss:3.6302 train_time:390231ms step_avg:141.95ms step:2760/3200 train_loss:3.4381 train_time:390373ms step_avg:141.95ms step:2761/3200 train_loss:3.3992 train_time:390512ms step_avg:141.95ms step:2762/3200 train_loss:3.3742 train_time:390652ms step_avg:141.95ms step:2763/3200 train_loss:3.2903 train_time:390791ms step_avg:141.95ms step:2764/3200 train_loss:3.4541 train_time:390931ms step_avg:141.95ms step:2765/3200 train_loss:3.3803 train_time:391075ms step_avg:141.95ms step:2766/3200 train_loss:3.2764 train_time:391214ms step_avg:141.95ms step:2767/3200 train_loss:3.3644 train_time:391356ms step_avg:141.95ms step:2768/3200 train_loss:3.4434 train_time:391496ms step_avg:141.95ms step:2769/3200 train_loss:3.3229 train_time:391636ms step_avg:141.95ms step:2770/3200 train_loss:3.3975 train_time:391775ms step_avg:141.95ms step:2771/3200 train_loss:3.3831 train_time:391914ms step_avg:141.95ms step:2772/3200 train_loss:3.8094 train_time:392055ms step_avg:141.95ms step:2773/3200 train_loss:3.2864 train_time:392194ms step_avg:141.95ms step:2774/3200 train_loss:3.4183 train_time:392334ms step_avg:141.94ms step:2775/3200 train_loss:3.4806 train_time:392475ms step_avg:141.94ms step:2776/3200 train_loss:3.4466 train_time:392616ms step_avg:141.94ms step:2777/3200 train_loss:3.5160 train_time:392755ms step_avg:141.94ms step:2778/3200 train_loss:3.5232 train_time:392894ms step_avg:141.94ms step:2779/3200 train_loss:3.3951 train_time:393035ms step_avg:141.94ms step:2780/3200 train_loss:3.2653 train_time:393175ms step_avg:141.94ms step:2781/3200 train_loss:3.4062 train_time:393316ms step_avg:141.94ms step:2782/3200 train_loss:3.4322 train_time:393456ms step_avg:141.94ms step:2783/3200 train_loss:3.3016 train_time:393597ms step_avg:141.94ms step:2784/3200 train_loss:3.4008 train_time:393736ms step_avg:141.94ms step:2785/3200 train_loss:3.4589 train_time:393876ms step_avg:141.94ms step:2786/3200 train_loss:3.3400 train_time:394015ms step_avg:141.94ms step:2787/3200 train_loss:3.4557 train_time:394155ms step_avg:141.94ms step:2788/3200 train_loss:3.4192 train_time:394295ms step_avg:141.93ms step:2789/3200 train_loss:3.3505 train_time:394436ms step_avg:141.93ms step:2790/3200 train_loss:3.4409 train_time:394577ms step_avg:141.93ms step:2791/3200 train_loss:3.3656 train_time:394716ms step_avg:141.93ms step:2792/3200 train_loss:3.2606 train_time:394857ms step_avg:141.93ms step:2793/3200 train_loss:3.3650 train_time:394997ms step_avg:141.93ms step:2794/3200 train_loss:3.4062 train_time:395136ms step_avg:141.93ms step:2795/3200 train_loss:3.3246 train_time:395278ms step_avg:141.93ms step:2796/3200 train_loss:3.3675 train_time:395418ms step_avg:141.93ms step:2797/3200 train_loss:3.2797 train_time:395558ms step_avg:141.93ms step:2798/3200 train_loss:3.3849 train_time:395698ms step_avg:141.93ms step:2799/3200 train_loss:3.3433 train_time:395838ms step_avg:141.93ms step:2800/3200 train_loss:3.5106 train_time:395978ms step_avg:141.93ms step:2801/3200 train_loss:3.4617 train_time:396117ms step_avg:141.93ms step:2802/3200 train_loss:3.4318 train_time:396259ms step_avg:141.93ms step:2803/3200 train_loss:3.3781 train_time:396400ms step_avg:141.93ms step:2804/3200 train_loss:3.5504 train_time:396539ms step_avg:141.93ms step:2805/3200 train_loss:3.5185 train_time:396680ms step_avg:141.92ms step:2806/3200 train_loss:3.2482 train_time:396820ms step_avg:141.92ms step:2807/3200 train_loss:3.6440 train_time:396963ms step_avg:141.92ms step:2808/3200 train_loss:3.3888 train_time:397101ms step_avg:141.92ms step:2809/3200 train_loss:3.3190 train_time:397241ms step_avg:141.92ms step:2810/3200 train_loss:3.3341 train_time:397382ms step_avg:141.92ms step:2811/3200 train_loss:3.5030 train_time:397523ms step_avg:141.92ms step:2812/3200 train_loss:3.4895 train_time:397664ms step_avg:141.92ms step:2813/3200 train_loss:3.2376 train_time:397806ms step_avg:141.92ms step:2814/3200 train_loss:3.4636 train_time:397946ms step_avg:141.92ms step:2815/3200 train_loss:3.5309 train_time:398087ms step_avg:141.92ms step:2816/3200 train_loss:3.3451 train_time:398229ms step_avg:141.92ms step:2817/3200 train_loss:2.9605 train_time:398369ms step_avg:141.92ms step:2818/3200 train_loss:3.3629 train_time:398511ms step_avg:141.92ms step:2819/3200 train_loss:3.3319 train_time:398652ms step_avg:141.92ms step:2820/3200 train_loss:3.5291 train_time:398792ms step_avg:141.92ms step:2821/3200 train_loss:3.3807 train_time:398931ms step_avg:141.92ms step:2822/3200 train_loss:3.4487 train_time:399072ms step_avg:141.92ms step:2823/3200 train_loss:3.3839 train_time:399212ms step_avg:141.92ms step:2824/3200 train_loss:3.3531 train_time:399353ms step_avg:141.92ms step:2825/3200 train_loss:3.2552 train_time:399492ms step_avg:141.92ms step:2826/3200 train_loss:3.5075 train_time:399634ms step_avg:141.92ms step:2827/3200 train_loss:3.4075 train_time:399774ms step_avg:141.91ms step:2828/3200 train_loss:3.2977 train_time:399914ms step_avg:141.91ms step:2829/3200 train_loss:3.4231 train_time:400060ms step_avg:141.92ms step:2830/3200 train_loss:3.4174 train_time:400193ms step_avg:141.91ms step:2831/3200 train_loss:3.3617 train_time:400336ms step_avg:141.91ms step:2832/3200 train_loss:3.4988 train_time:400474ms step_avg:141.91ms step:2833/3200 train_loss:3.4218 train_time:400614ms step_avg:141.91ms step:2834/3200 train_loss:3.4007 train_time:400757ms step_avg:141.91ms step:2835/3200 train_loss:3.2167 train_time:401126ms step_avg:141.99ms step:2836/3200 train_loss:3.4349 train_time:401263ms step_avg:141.99ms step:2837/3200 train_loss:3.3706 train_time:401400ms step_avg:141.99ms step:2838/3200 train_loss:3.6802 train_time:401539ms step_avg:141.99ms step:2839/3200 train_loss:3.3238 train_time:401678ms step_avg:141.99ms step:2840/3200 train_loss:3.3314 train_time:401817ms step_avg:141.98ms step:2841/3200 train_loss:3.3886 train_time:401954ms step_avg:141.98ms step:2842/3200 train_loss:3.3165 train_time:402099ms step_avg:141.98ms step:2843/3200 train_loss:3.3217 train_time:402243ms step_avg:141.98ms step:2844/3200 train_loss:3.4884 train_time:402386ms step_avg:141.99ms step:2845/3200 train_loss:3.3784 train_time:402526ms step_avg:141.98ms step:2846/3200 train_loss:3.4077 train_time:402665ms step_avg:141.98ms step:2847/3200 train_loss:3.3661 train_time:402807ms step_avg:141.98ms step:2848/3200 train_loss:3.6377 train_time:402945ms step_avg:141.98ms step:2849/3200 train_loss:3.2955 train_time:403088ms step_avg:141.98ms step:2850/3200 train_loss:3.3311 train_time:403418ms step_avg:142.05ms step:2851/3200 train_loss:3.4301 train_time:403557ms step_avg:142.05ms step:2852/3200 train_loss:3.4046 train_time:403696ms step_avg:142.05ms step:2853/3200 train_loss:3.3669 train_time:403834ms step_avg:142.05ms step:2854/3200 train_loss:3.4424 train_time:403973ms step_avg:142.04ms step:2855/3200 train_loss:3.2606 train_time:404110ms step_avg:142.04ms step:2856/3200 train_loss:3.2820 train_time:404253ms step_avg:142.04ms step:2857/3200 train_loss:3.3824 train_time:404395ms step_avg:142.04ms step:2858/3200 train_loss:3.3808 train_time:404538ms step_avg:142.04ms step:2859/3200 train_loss:3.2732 train_time:404680ms step_avg:142.04ms step:2860/3200 train_loss:3.3618 train_time:404817ms step_avg:142.04ms step:2861/3200 train_loss:3.3256 train_time:404957ms step_avg:142.04ms step:2862/3200 train_loss:3.3639 train_time:405096ms step_avg:142.04ms step:2863/3200 train_loss:3.4117 train_time:405237ms step_avg:142.04ms step:2864/3200 train_loss:3.6765 train_time:405377ms step_avg:142.04ms step:2865/3200 train_loss:3.4776 train_time:405519ms step_avg:142.04ms step:2866/3200 train_loss:3.3718 train_time:405663ms step_avg:142.04ms step:2867/3200 train_loss:3.2516 train_time:405805ms step_avg:142.04ms step:2868/3200 train_loss:3.4599 train_time:405943ms step_avg:142.04ms step:2869/3200 train_loss:3.4080 train_time:406084ms step_avg:142.04ms step:2870/3200 train_loss:3.3700 train_time:406223ms step_avg:142.04ms step:2871/3200 train_loss:3.5093 train_time:406368ms step_avg:142.04ms step:2872/3200 train_loss:3.2786 train_time:406510ms step_avg:142.04ms step:2873/3200 train_loss:3.3493 train_time:406652ms step_avg:142.04ms step:2874/3200 train_loss:3.2159 train_time:406791ms step_avg:142.04ms step:2875/3200 train_loss:3.3676 train_time:406932ms step_avg:142.04ms step:2875/3200 val_loss:3.3671 train_time:406985ms step_avg:142.05ms step:2876/3200 train_loss:3.2913 train_time:407077ms step_avg:142.04ms step:2877/3200 train_loss:3.2724 train_time:407223ms step_avg:142.04ms step:2878/3200 train_loss:3.3586 train_time:407363ms step_avg:142.04ms step:2879/3200 train_loss:3.4779 train_time:407501ms step_avg:142.04ms step:2880/3200 train_loss:3.4234 train_time:407639ms step_avg:142.03ms step:2881/3200 train_loss:3.3720 train_time:407778ms step_avg:142.03ms step:2882/3200 train_loss:3.3607 train_time:407918ms step_avg:142.03ms step:2883/3200 train_loss:3.4778 train_time:408063ms step_avg:142.03ms step:2884/3200 train_loss:3.2601 train_time:408207ms step_avg:142.03ms step:2885/3200 train_loss:3.2864 train_time:408347ms step_avg:142.03ms step:2886/3200 train_loss:3.3277 train_time:408486ms step_avg:142.03ms step:2887/3200 train_loss:3.3267 train_time:408626ms step_avg:142.03ms step:2888/3200 train_loss:3.3323 train_time:408765ms step_avg:142.03ms step:2889/3200 train_loss:3.3642 train_time:408905ms step_avg:142.03ms step:2890/3200 train_loss:3.5424 train_time:409046ms step_avg:142.03ms step:2891/3200 train_loss:3.3864 train_time:409187ms step_avg:142.03ms step:2892/3200 train_loss:3.2182 train_time:409328ms step_avg:142.03ms step:2893/3200 train_loss:3.1543 train_time:409469ms step_avg:142.03ms step:2894/3200 train_loss:3.3005 train_time:409608ms step_avg:142.03ms step:2895/3200 train_loss:3.1790 train_time:409748ms step_avg:142.03ms step:2896/3200 train_loss:3.3583 train_time:409888ms step_avg:142.03ms step:2897/3200 train_loss:3.4869 train_time:410028ms step_avg:142.03ms step:2898/3200 train_loss:3.3095 train_time:410168ms step_avg:142.02ms step:2899/3200 train_loss:3.4041 train_time:410308ms step_avg:142.02ms step:2900/3200 train_loss:3.2894 train_time:410448ms step_avg:142.02ms step:2901/3200 train_loss:3.4770 train_time:410588ms step_avg:142.02ms step:2902/3200 train_loss:3.4638 train_time:410727ms step_avg:142.02ms step:2903/3200 train_loss:3.4961 train_time:410867ms step_avg:142.02ms step:2904/3200 train_loss:3.2244 train_time:411007ms step_avg:142.02ms step:2905/3200 train_loss:3.3651 train_time:411149ms step_avg:142.02ms step:2906/3200 train_loss:3.3405 train_time:411288ms step_avg:142.02ms step:2907/3200 train_loss:3.4114 train_time:411429ms step_avg:142.02ms step:2908/3200 train_loss:3.3540 train_time:411569ms step_avg:142.02ms step:2909/3200 train_loss:3.3206 train_time:411709ms step_avg:142.02ms step:2910/3200 train_loss:3.6545 train_time:411849ms step_avg:142.02ms step:2911/3200 train_loss:3.3667 train_time:411988ms step_avg:142.02ms step:2912/3200 train_loss:3.2756 train_time:412129ms step_avg:142.02ms step:2913/3200 train_loss:3.2610 train_time:412268ms step_avg:142.01ms step:2914/3200 train_loss:3.7437 train_time:412409ms step_avg:142.01ms step:2915/3200 train_loss:3.3352 train_time:412549ms step_avg:142.01ms step:2916/3200 train_loss:3.2854 train_time:412689ms step_avg:142.01ms step:2917/3200 train_loss:3.2728 train_time:412829ms step_avg:142.01ms step:2918/3200 train_loss:3.5503 train_time:412968ms step_avg:142.01ms step:2919/3200 train_loss:3.0560 train_time:413109ms step_avg:142.01ms step:2920/3200 train_loss:3.2542 train_time:413249ms step_avg:142.01ms step:2921/3200 train_loss:3.2720 train_time:413389ms step_avg:142.01ms step:2922/3200 train_loss:3.3762 train_time:413529ms step_avg:142.01ms step:2923/3200 train_loss:3.4132 train_time:413668ms step_avg:142.01ms step:2924/3200 train_loss:3.4469 train_time:413808ms step_avg:142.01ms step:2925/3200 train_loss:3.4583 train_time:413948ms step_avg:142.01ms step:2926/3200 train_loss:3.3395 train_time:414088ms step_avg:142.01ms step:2927/3200 train_loss:3.3489 train_time:414227ms step_avg:142.00ms step:2928/3200 train_loss:3.3361 train_time:414368ms step_avg:142.00ms step:2929/3200 train_loss:3.3414 train_time:414509ms step_avg:142.00ms step:2930/3200 train_loss:3.2990 train_time:414648ms step_avg:142.00ms step:2931/3200 train_loss:3.3286 train_time:414788ms step_avg:142.00ms step:2932/3200 train_loss:3.4623 train_time:414929ms step_avg:142.00ms step:2933/3200 train_loss:3.4974 train_time:415069ms step_avg:142.00ms step:2934/3200 train_loss:3.4717 train_time:415209ms step_avg:142.00ms step:2935/3200 train_loss:3.3116 train_time:415349ms step_avg:142.00ms step:2936/3200 train_loss:3.3662 train_time:415489ms step_avg:142.00ms step:2937/3200 train_loss:3.3116 train_time:415629ms step_avg:142.00ms step:2938/3200 train_loss:3.3346 train_time:415771ms step_avg:142.00ms step:2939/3200 train_loss:3.3533 train_time:415909ms step_avg:142.00ms step:2940/3200 train_loss:3.3994 train_time:416050ms step_avg:142.00ms step:2941/3200 train_loss:3.4420 train_time:416190ms step_avg:142.00ms step:2942/3200 train_loss:3.4311 train_time:416331ms step_avg:142.00ms step:2943/3200 train_loss:3.3619 train_time:416474ms step_avg:142.00ms step:2944/3200 train_loss:3.2407 train_time:416611ms step_avg:141.99ms step:2945/3200 train_loss:3.1787 train_time:416751ms step_avg:141.99ms step:2946/3200 train_loss:3.3865 train_time:416892ms step_avg:141.99ms step:2947/3200 train_loss:3.4491 train_time:417033ms step_avg:141.99ms step:2948/3200 train_loss:3.3810 train_time:417174ms step_avg:141.99ms step:2949/3200 train_loss:3.5680 train_time:417315ms step_avg:141.99ms step:2950/3200 train_loss:3.3842 train_time:417457ms step_avg:141.99ms step:2951/3200 train_loss:3.3889 train_time:417597ms step_avg:141.99ms step:2952/3200 train_loss:3.7849 train_time:417740ms step_avg:141.99ms step:2953/3200 train_loss:3.4679 train_time:417879ms step_avg:141.99ms step:2954/3200 train_loss:3.4085 train_time:418019ms step_avg:141.99ms step:2955/3200 train_loss:3.4204 train_time:418161ms step_avg:141.99ms step:2956/3200 train_loss:3.3539 train_time:418301ms step_avg:141.99ms step:2957/3200 train_loss:3.3773 train_time:418440ms step_avg:141.99ms step:2958/3200 train_loss:3.2493 train_time:418580ms step_avg:141.99ms step:2959/3200 train_loss:3.3346 train_time:418720ms step_avg:141.99ms step:2960/3200 train_loss:3.4764 train_time:418861ms step_avg:141.99ms step:2961/3200 train_loss:3.2837 train_time:419000ms step_avg:141.99ms step:2962/3200 train_loss:3.4102 train_time:419144ms step_avg:141.99ms step:2963/3200 train_loss:3.2735 train_time:419282ms step_avg:141.98ms step:2964/3200 train_loss:3.3354 train_time:419422ms step_avg:141.98ms step:2965/3200 train_loss:3.3132 train_time:419561ms step_avg:141.98ms step:2966/3200 train_loss:3.4215 train_time:419701ms step_avg:141.98ms step:2967/3200 train_loss:3.3061 train_time:419841ms step_avg:141.98ms step:2968/3200 train_loss:3.5410 train_time:419982ms step_avg:141.98ms step:2969/3200 train_loss:3.3955 train_time:420123ms step_avg:141.98ms step:2970/3200 train_loss:3.4077 train_time:420263ms step_avg:141.98ms step:2971/3200 train_loss:3.3894 train_time:420403ms step_avg:141.98ms step:2972/3200 train_loss:3.4684 train_time:420542ms step_avg:141.98ms step:2973/3200 train_loss:3.2978 train_time:420683ms step_avg:141.98ms step:2974/3200 train_loss:3.3027 train_time:420822ms step_avg:141.98ms step:2975/3200 train_loss:3.2240 train_time:420962ms step_avg:141.98ms step:2976/3200 train_loss:3.2989 train_time:421102ms step_avg:141.98ms step:2977/3200 train_loss:3.2850 train_time:421242ms step_avg:141.98ms step:2978/3200 train_loss:3.3068 train_time:421386ms step_avg:141.98ms step:2979/3200 train_loss:3.5901 train_time:421524ms step_avg:141.97ms step:2980/3200 train_loss:3.3944 train_time:421664ms step_avg:141.97ms step:2981/3200 train_loss:3.4374 train_time:421804ms step_avg:141.97ms step:2982/3200 train_loss:3.4522 train_time:421943ms step_avg:141.97ms step:2983/3200 train_loss:3.5281 train_time:422083ms step_avg:141.97ms step:2984/3200 train_loss:3.3343 train_time:422224ms step_avg:141.97ms step:2985/3200 train_loss:3.4311 train_time:422364ms step_avg:141.97ms step:2986/3200 train_loss:3.4324 train_time:422504ms step_avg:141.97ms step:2987/3200 train_loss:3.3785 train_time:422645ms step_avg:141.97ms step:2988/3200 train_loss:3.4965 train_time:422785ms step_avg:141.97ms step:2989/3200 train_loss:3.0956 train_time:422926ms step_avg:141.97ms step:2990/3200 train_loss:3.4438 train_time:423065ms step_avg:141.97ms step:2991/3200 train_loss:3.4007 train_time:423205ms step_avg:141.97ms step:2992/3200 train_loss:3.3591 train_time:423346ms step_avg:141.97ms step:2993/3200 train_loss:3.2965 train_time:423486ms step_avg:141.97ms step:2994/3200 train_loss:3.4294 train_time:423626ms step_avg:141.97ms step:2995/3200 train_loss:3.2524 train_time:423766ms step_avg:141.97ms step:2996/3200 train_loss:3.2733 train_time:423906ms step_avg:141.96ms step:2997/3200 train_loss:3.3475 train_time:424046ms step_avg:141.96ms step:2998/3200 train_loss:3.2889 train_time:424185ms step_avg:141.96ms step:2999/3200 train_loss:3.4121 train_time:424327ms step_avg:141.96ms step:3000/3200 train_loss:3.3148 train_time:424471ms step_avg:141.96ms step:3000/3200 val_loss:3.3482 train_time:424521ms step_avg:141.98ms step:3001/3200 train_loss:3.3060 train_time:424620ms step_avg:141.97ms step:3002/3200 train_loss:3.2519 train_time:424764ms step_avg:141.97ms step:3003/3200 train_loss:3.2895 train_time:424904ms step_avg:141.97ms step:3004/3200 train_loss:3.4152 train_time:425042ms step_avg:141.96ms step:3005/3200 train_loss:3.7563 train_time:425181ms step_avg:141.96ms step:3006/3200 train_loss:3.3229 train_time:425321ms step_avg:141.96ms step:3007/3200 train_loss:3.4026 train_time:425460ms step_avg:141.96ms step:3008/3200 train_loss:3.2060 train_time:425604ms step_avg:141.96ms step:3009/3200 train_loss:3.4267 train_time:425749ms step_avg:141.96ms step:3010/3200 train_loss:3.3219 train_time:425891ms step_avg:141.96ms step:3011/3200 train_loss:3.3841 train_time:426028ms step_avg:141.96ms step:3012/3200 train_loss:3.3861 train_time:426168ms step_avg:141.96ms step:3013/3200 train_loss:3.2668 train_time:426308ms step_avg:141.96ms step:3014/3200 train_loss:3.4641 train_time:426448ms step_avg:141.96ms step:3015/3200 train_loss:3.4269 train_time:426589ms step_avg:141.96ms step:3016/3200 train_loss:3.2918 train_time:426730ms step_avg:141.96ms step:3017/3200 train_loss:3.3349 train_time:426872ms step_avg:141.96ms step:3018/3200 train_loss:3.3792 train_time:427011ms step_avg:141.96ms step:3019/3200 train_loss:3.4206 train_time:427151ms step_avg:141.96ms step:3020/3200 train_loss:3.1980 train_time:427290ms step_avg:141.96ms step:3021/3200 train_loss:3.5000 train_time:427430ms step_avg:141.96ms step:3022/3200 train_loss:3.3328 train_time:427571ms step_avg:141.96ms step:3023/3200 train_loss:3.2489 train_time:427712ms step_avg:141.96ms step:3024/3200 train_loss:3.3470 train_time:428004ms step_avg:142.01ms step:3025/3200 train_loss:3.3273 train_time:428140ms step_avg:142.00ms step:3026/3200 train_loss:3.3867 train_time:428279ms step_avg:142.00ms step:3027/3200 train_loss:3.4069 train_time:428419ms step_avg:142.00ms step:3028/3200 train_loss:3.3103 train_time:428559ms step_avg:142.00ms step:3029/3200 train_loss:3.1134 train_time:428698ms step_avg:142.00ms step:3030/3200 train_loss:3.4554 train_time:428837ms step_avg:142.00ms step:3031/3200 train_loss:3.2269 train_time:428982ms step_avg:142.00ms step:3032/3200 train_loss:3.2113 train_time:429125ms step_avg:142.00ms step:3033/3200 train_loss:3.5512 train_time:429265ms step_avg:142.00ms step:3034/3200 train_loss:3.5487 train_time:429405ms step_avg:142.00ms step:3035/3200 train_loss:3.3166 train_time:429546ms step_avg:142.00ms step:3036/3200 train_loss:3.3966 train_time:429686ms step_avg:142.00ms step:3037/3200 train_loss:3.3466 train_time:429825ms step_avg:142.00ms step:3038/3200 train_loss:3.2478 train_time:429969ms step_avg:142.00ms step:3039/3200 train_loss:3.2941 train_time:430110ms step_avg:142.00ms step:3040/3200 train_loss:3.3923 train_time:430414ms step_avg:142.05ms step:3041/3200 train_loss:3.3843 train_time:430549ms step_avg:142.05ms step:3042/3200 train_loss:3.1762 train_time:430688ms step_avg:142.05ms step:3043/3200 train_loss:3.3343 train_time:430826ms step_avg:142.05ms step:3044/3200 train_loss:3.3626 train_time:430965ms step_avg:142.05ms step:3045/3200 train_loss:3.3696 train_time:431104ms step_avg:142.04ms step:3046/3200 train_loss:3.4517 train_time:431243ms step_avg:142.04ms step:3047/3200 train_loss:3.2704 train_time:431389ms step_avg:142.04ms step:3048/3200 train_loss:3.3936 train_time:431533ms step_avg:142.05ms step:3049/3200 train_loss:3.3379 train_time:431672ms step_avg:142.04ms step:3050/3200 train_loss:3.2695 train_time:431811ms step_avg:142.04ms step:3051/3200 train_loss:3.3946 train_time:431950ms step_avg:142.04ms step:3052/3200 train_loss:3.2376 train_time:432089ms step_avg:142.04ms step:3053/3200 train_loss:3.4766 train_time:432229ms step_avg:142.04ms step:3054/3200 train_loss:3.4313 train_time:432372ms step_avg:142.04ms step:3055/3200 train_loss:3.4039 train_time:432513ms step_avg:142.04ms step:3056/3200 train_loss:3.4114 train_time:432652ms step_avg:142.04ms step:3057/3200 train_loss:3.2894 train_time:432792ms step_avg:142.04ms step:3058/3200 train_loss:3.3077 train_time:432930ms step_avg:142.04ms step:3059/3200 train_loss:3.3888 train_time:433070ms step_avg:142.04ms step:3060/3200 train_loss:3.2999 train_time:433210ms step_avg:142.04ms step:3061/3200 train_loss:3.3582 train_time:433354ms step_avg:142.04ms step:3062/3200 train_loss:3.3602 train_time:433491ms step_avg:142.04ms step:3063/3200 train_loss:3.3002 train_time:433632ms step_avg:142.03ms step:3064/3200 train_loss:3.2700 train_time:433772ms step_avg:142.03ms step:3065/3200 train_loss:3.2884 train_time:433911ms step_avg:142.03ms step:3066/3200 train_loss:3.2675 train_time:434051ms step_avg:142.03ms step:3067/3200 train_loss:3.2620 train_time:434191ms step_avg:142.03ms step:3068/3200 train_loss:3.2178 train_time:434330ms step_avg:142.03ms step:3069/3200 train_loss:3.2572 train_time:434472ms step_avg:142.03ms step:3070/3200 train_loss:3.2495 train_time:434612ms step_avg:142.03ms step:3071/3200 train_loss:3.4401 train_time:434752ms step_avg:142.03ms step:3072/3200 train_loss:3.3653 train_time:434892ms step_avg:142.03ms step:3073/3200 train_loss:3.4064 train_time:435032ms step_avg:142.03ms step:3074/3200 train_loss:3.3964 train_time:435173ms step_avg:142.03ms step:3075/3200 train_loss:3.3386 train_time:435313ms step_avg:142.03ms step:3076/3200 train_loss:3.3957 train_time:435454ms step_avg:142.03ms step:3077/3200 train_loss:3.4519 train_time:435595ms step_avg:142.03ms step:3078/3200 train_loss:3.2460 train_time:435735ms step_avg:142.03ms step:3079/3200 train_loss:3.7915 train_time:435875ms step_avg:142.03ms step:3080/3200 train_loss:3.3352 train_time:436015ms step_avg:142.02ms step:3081/3200 train_loss:3.3015 train_time:436155ms step_avg:142.02ms step:3082/3200 train_loss:3.4496 train_time:436296ms step_avg:142.02ms step:3083/3200 train_loss:3.2598 train_time:436437ms step_avg:142.02ms step:3084/3200 train_loss:3.2891 train_time:436578ms step_avg:142.02ms step:3085/3200 train_loss:3.3422 train_time:436720ms step_avg:142.02ms step:3086/3200 train_loss:3.4351 train_time:436865ms step_avg:142.02ms step:3087/3200 train_loss:3.3468 train_time:437002ms step_avg:142.02ms step:3088/3200 train_loss:3.2584 train_time:437143ms step_avg:142.02ms step:3089/3200 train_loss:3.4098 train_time:437282ms step_avg:142.02ms step:3090/3200 train_loss:3.2643 train_time:437424ms step_avg:142.02ms step:3091/3200 train_loss:3.5310 train_time:437566ms step_avg:142.02ms step:3092/3200 train_loss:4.1032 train_time:437706ms step_avg:142.02ms step:3093/3200 train_loss:3.3716 train_time:437847ms step_avg:142.02ms step:3094/3200 train_loss:3.2605 train_time:437987ms step_avg:142.02ms step:3095/3200 train_loss:3.2107 train_time:438126ms step_avg:142.02ms step:3096/3200 train_loss:3.3843 train_time:438266ms step_avg:142.02ms step:3097/3200 train_loss:3.5122 train_time:438406ms step_avg:142.02ms step:3098/3200 train_loss:3.2840 train_time:438548ms step_avg:142.02ms step:3099/3200 train_loss:3.3220 train_time:438687ms step_avg:142.02ms step:3100/3200 train_loss:3.4895 train_time:438831ms step_avg:142.02ms step:3101/3200 train_loss:3.4020 train_time:438969ms step_avg:142.02ms step:3102/3200 train_loss:3.3920 train_time:439109ms step_avg:142.01ms step:3103/3200 train_loss:3.3041 train_time:439249ms step_avg:142.01ms step:3104/3200 train_loss:3.5615 train_time:439390ms step_avg:142.01ms step:3105/3200 train_loss:3.3798 train_time:439529ms step_avg:142.01ms step:3106/3200 train_loss:3.2383 train_time:439670ms step_avg:142.01ms step:3107/3200 train_loss:3.2625 train_time:439811ms step_avg:142.01ms step:3108/3200 train_loss:3.2240 train_time:439951ms step_avg:142.01ms step:3109/3200 train_loss:3.4437 train_time:440091ms step_avg:142.01ms step:3110/3200 train_loss:3.3325 train_time:440230ms step_avg:142.01ms step:3111/3200 train_loss:3.3640 train_time:440370ms step_avg:142.01ms step:3112/3200 train_loss:3.3375 train_time:440511ms step_avg:142.01ms step:3113/3200 train_loss:3.3933 train_time:440650ms step_avg:142.01ms step:3114/3200 train_loss:3.3511 train_time:440791ms step_avg:142.01ms step:3115/3200 train_loss:3.3497 train_time:440931ms step_avg:142.01ms step:3116/3200 train_loss:3.3885 train_time:441071ms step_avg:142.01ms step:3117/3200 train_loss:3.2402 train_time:441210ms step_avg:142.01ms step:3118/3200 train_loss:3.2610 train_time:441350ms step_avg:142.00ms step:3119/3200 train_loss:3.4399 train_time:441490ms step_avg:142.00ms step:3120/3200 train_loss:3.4227 train_time:441629ms step_avg:142.00ms step:3121/3200 train_loss:3.2125 train_time:441771ms step_avg:142.00ms step:3122/3200 train_loss:3.4067 train_time:441911ms step_avg:142.00ms step:3123/3200 train_loss:3.4659 train_time:442051ms step_avg:142.00ms step:3124/3200 train_loss:3.4295 train_time:442191ms step_avg:142.00ms step:3125/3200 train_loss:3.2263 train_time:442332ms step_avg:142.00ms step:3125/3200 val_loss:3.3321 train_time:442387ms step_avg:142.02ms step:3126/3200 train_loss:3.3095 train_time:442484ms step_avg:142.00ms step:3127/3200 train_loss:3.3385 train_time:442630ms step_avg:142.01ms step:3128/3200 train_loss:3.4305 train_time:442769ms step_avg:142.00ms step:3129/3200 train_loss:3.5006 train_time:442908ms step_avg:142.00ms step:3130/3200 train_loss:3.2002 train_time:443047ms step_avg:142.00ms step:3131/3200 train_loss:3.3674 train_time:443187ms step_avg:142.00ms step:3132/3200 train_loss:3.3729 train_time:443325ms step_avg:142.00ms step:3133/3200 train_loss:3.3918 train_time:443467ms step_avg:142.00ms step:3134/3200 train_loss:3.2864 train_time:443611ms step_avg:142.00ms step:3135/3200 train_loss:3.4087 train_time:443750ms step_avg:142.00ms step:3136/3200 train_loss:3.3140 train_time:443891ms step_avg:142.00ms step:3137/3200 train_loss:3.3901 train_time:444031ms step_avg:142.00ms step:3138/3200 train_loss:3.5728 train_time:444170ms step_avg:142.00ms step:3139/3200 train_loss:3.5524 train_time:444309ms step_avg:142.00ms step:3140/3200 train_loss:3.3184 train_time:444449ms step_avg:142.00ms step:3141/3200 train_loss:3.3372 train_time:444591ms step_avg:142.00ms step:3142/3200 train_loss:3.2521 train_time:444732ms step_avg:142.00ms step:3143/3200 train_loss:3.3537 train_time:444874ms step_avg:142.00ms step:3144/3200 train_loss:3.1535 train_time:445014ms step_avg:142.00ms step:3145/3200 train_loss:3.3881 train_time:445154ms step_avg:141.99ms step:3146/3200 train_loss:3.2977 train_time:445295ms step_avg:141.99ms step:3147/3200 train_loss:3.3231 train_time:445437ms step_avg:141.99ms step:3148/3200 train_loss:3.4922 train_time:445577ms step_avg:141.99ms step:3149/3200 train_loss:3.5740 train_time:445717ms step_avg:141.99ms step:3150/3200 train_loss:3.4551 train_time:445859ms step_avg:141.99ms step:3151/3200 train_loss:3.2589 train_time:445999ms step_avg:141.99ms step:3152/3200 train_loss:3.3089 train_time:446140ms step_avg:141.99ms step:3153/3200 train_loss:3.2795 train_time:446280ms step_avg:141.99ms step:3154/3200 train_loss:3.4033 train_time:446420ms step_avg:141.99ms step:3155/3200 train_loss:3.2223 train_time:446560ms step_avg:141.99ms step:3156/3200 train_loss:3.3528 train_time:446701ms step_avg:141.99ms step:3157/3200 train_loss:3.2990 train_time:446841ms step_avg:141.99ms step:3158/3200 train_loss:3.4266 train_time:446981ms step_avg:141.99ms step:3159/3200 train_loss:3.4887 train_time:447120ms step_avg:141.99ms step:3160/3200 train_loss:3.3355 train_time:447260ms step_avg:141.99ms step:3161/3200 train_loss:3.3900 train_time:447400ms step_avg:141.99ms step:3162/3200 train_loss:3.4774 train_time:447541ms step_avg:141.99ms step:3163/3200 train_loss:3.3744 train_time:447682ms step_avg:141.99ms step:3164/3200 train_loss:3.4320 train_time:447822ms step_avg:141.99ms step:3165/3200 train_loss:3.2532 train_time:447961ms step_avg:141.98ms step:3166/3200 train_loss:3.2370 train_time:448102ms step_avg:141.98ms step:3167/3200 train_loss:3.2790 train_time:448241ms step_avg:141.98ms step:3168/3200 train_loss:3.0950 train_time:448382ms step_avg:141.98ms step:3169/3200 train_loss:3.2656 train_time:448521ms step_avg:141.98ms step:3170/3200 train_loss:3.4059 train_time:448662ms step_avg:141.98ms step:3171/3200 train_loss:3.4251 train_time:448802ms step_avg:141.98ms step:3172/3200 train_loss:3.3994 train_time:448942ms step_avg:141.98ms step:3173/3200 train_loss:3.3728 train_time:449082ms step_avg:141.98ms step:3174/3200 train_loss:3.3380 train_time:449222ms step_avg:141.98ms step:3175/3200 train_loss:3.3383 train_time:449362ms step_avg:141.98ms step:3176/3200 train_loss:3.3347 train_time:449502ms step_avg:141.98ms step:3177/3200 train_loss:3.2684 train_time:449642ms step_avg:141.98ms step:3178/3200 train_loss:3.3930 train_time:449782ms step_avg:141.98ms step:3179/3200 train_loss:3.4810 train_time:449921ms step_avg:141.98ms step:3180/3200 train_loss:3.3191 train_time:450066ms step_avg:141.98ms step:3181/3200 train_loss:3.3053 train_time:450203ms step_avg:141.98ms step:3182/3200 train_loss:3.3502 train_time:450342ms step_avg:141.97ms step:3183/3200 train_loss:3.4473 train_time:450482ms step_avg:141.97ms step:3184/3200 train_loss:3.4654 train_time:450622ms step_avg:141.97ms step:3185/3200 train_loss:3.3625 train_time:450762ms step_avg:141.97ms step:3186/3200 train_loss:3.4347 train_time:450902ms step_avg:141.97ms step:3187/3200 train_loss:3.4244 train_time:451043ms step_avg:141.97ms step:3188/3200 train_loss:3.2194 train_time:451183ms step_avg:141.97ms step:3189/3200 train_loss:3.3090 train_time:451322ms step_avg:141.97ms step:3190/3200 train_loss:3.3347 train_time:451462ms step_avg:141.97ms step:3191/3200 train_loss:3.3538 train_time:451603ms step_avg:141.97ms step:3192/3200 train_loss:3.3180 train_time:451742ms step_avg:141.97ms step:3193/3200 train_loss:3.2420 train_time:451882ms step_avg:141.97ms step:3194/3200 train_loss:4.2669 train_time:452022ms step_avg:141.97ms step:3195/3200 train_loss:3.3552 train_time:452163ms step_avg:141.97ms step:3196/3200 train_loss:3.1576 train_time:452304ms step_avg:141.97ms step:3197/3200 train_loss:3.3029 train_time:452443ms step_avg:141.97ms step:3198/3200 train_loss:3.1684 train_time:452584ms step_avg:141.96ms step:3199/3200 train_loss:3.3004 train_time:452724ms step_avg:141.96ms step:3200/3200 train_loss:3.2364 train_time:452863ms step_avg:141.96ms step:3200/3200 val_loss:3.3274 train_time:452918ms step_avg:141.98ms