==================================================================================================== import os import sys with open(sys.argv[0]) as f: code = f.read() # read the code of this file ASAP, for logging import uuid import glob import time from dataclasses import dataclass import numpy as np import torch from torch import nn import torch.nn.functional as F import torch.distributed as dist import torch._inductor.config as config from torch.nn.parallel import DistributedDataParallel as DDP # ----------------------------------------------------------------------------- # Muon optimizer def zeropower_via_svd(G, steps=None): U, S, V = G.svd() return U @ V.T @torch.compile def zeropower_via_newtonschulz5(G, steps=10, eps=1e-7): """ Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. We opt to use a quintic iteration whose coefficients are selected to maximize the slope at zero. For the purpose of minimizing steps, it turns out to be empirically effective to keep increasing the slope at zero even beyond the point where the iteration no longer converges all the way to one everywhere on the interval. This iteration therefore does not produce UV^T but rather something like US'V^T where S' is diagonal with S_{ii}' \sim Uniform(0.5, 1.5), which turns out not to hurt model performance at all relative to UV^T, where USV^T = G is the SVD. """ assert len(G.shape) == 2 a, b, c = (3.4445, -4.7750, 2.0315) X = G.bfloat16() X /= (X.norm() + eps) # ensure top singular value <= 1 if G.size(0) > G.size(1): X = X.T for _ in range(steps): A = X @ X.T B = A @ X X = a * X + b * B + c * A @ B if G.size(0) > G.size(1): X = X.T return X zeropower_backends = dict(svd=zeropower_via_svd, newtonschulz5=zeropower_via_newtonschulz5) class Muon(torch.optim.Optimizer): """ Muon - MomentUm Orthogonalized by Newton-schulz Muon internally runs standard SGD-momentum, and then performs an orthogonalization post- processing step, in which each 2D parameter's update is replaced with the nearest orthogonal matrix. To efficiently orthogonalize each update, we use a Newton-Schulz iteration, which has the advantage that it can be stably run in bfloat16 on the GPU. Some warnings: - This optimizer assumes that all parameters passed in are 2D. - It should not be used for the embedding layer, the final fully connected layer, or any {0,1}-D parameters; those should all be optimized by a standard method (e.g., AdamW). - To use it with 4D convolutional filters, it works well to just flatten their last 3 dimensions. - We believe it is unlikely to work well for training with small batch size. - We believe it may not work well for finetuning pretrained models, but we haven't tested this. - We have not yet tried this optimizer for training scenarios larger than NanoGPT (124M). Arguments: lr: The learning rate used by the internal SGD. momentum: The momentum used by the internal SGD. nesterov: Whether to use Nesterov-style momentum in the internal SGD. (recommended) backend: The chosen backend for the orthogonalization step. (recommended: 'newtonschulz5') backend_steps: The number of iteration steps to use in the backend, if it is iterative. """ def __init__(self, params, lr=0.02, momentum=0.95, nesterov=True, backend='newtonschulz5', backend_steps=5): defaults = dict(lr=lr, momentum=momentum, nesterov=nesterov, backend=backend, backend_steps=backend_steps) super().__init__(params, defaults) def step(self): for group in self.param_groups: lr = group['lr'] momentum = group['momentum'] zeropower_backend = zeropower_backends[group['backend']] # generate weight updates in distributed fashion total_params = sum(p.numel() for p in group['params']) updates_flat = torch.zeros(total_params, device='cuda', dtype=torch.bfloat16) curr_idx = 0 for i, p in enumerate(group['params']): # luckily this will perfectly distribute a transformer with multiple of 4 layers to 8 GPUs if i % int(os.environ['WORLD_SIZE']) == int(os.environ['RANK']): g = p.grad assert g is not None state = self.state[p] if 'momentum_buffer' not in state: state['momentum_buffer'] = torch.zeros_like(g) buf = state['momentum_buffer'] buf.mul_(momentum).add_(g) if group['nesterov']: g = g.add(buf, alpha=momentum) g = zeropower_backend(g, steps=group['backend_steps']) g *= max(1, g.size(0)/g.size(1))**0.5 updates_flat[curr_idx:curr_idx+p.numel()] = g.flatten() curr_idx += p.numel() # sync updates across devices. we are not memory-constrained so can do this simple deserialization dist.all_reduce(updates_flat, op=dist.ReduceOp.SUM) # deserialize and apply updates curr_idx = 0 for p in group['params']: g = updates_flat[curr_idx:curr_idx+p.numel()].view_as(p.data).type_as(p.data) p.data.add_(g, alpha=-lr) curr_idx += p.numel() # ----------------------------------------------------------------------------- # PyTorch nn.Module definitions for the GPT-2 model class Rotary(torch.nn.Module): def __init__(self, dim, base=10000): super().__init__() self.inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim)) self.seq_len_cached = None self.cos_cached = None self.sin_cached = None def forward(self, x): seq_len = x.shape[1] if seq_len != self.seq_len_cached: self.seq_len_cached = seq_len t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq) freqs = torch.outer(t, self.inv_freq).to(x.device) self.cos_cached = freqs.cos().bfloat16() self.sin_cached = freqs.sin().bfloat16() return self.cos_cached[None, :, None, :], self.sin_cached[None, :, None, :] def apply_rotary_emb(x, cos, sin): assert x.ndim == 4 # multihead attention d = x.shape[3]//2 x1 = x[..., :d] x2 = x[..., d:] y1 = x1 * cos + x2 * sin y2 = x1 * (-sin) + x2 * cos return torch.cat([y1, y2], 3).type_as(x) class CausalSelfAttention(nn.Module): def __init__(self, config): super().__init__() self.n_head = config.n_head self.n_embd = config.n_embd self.head_dim = self.n_embd // self.n_head assert self.n_embd % self.n_head == 0 self.c_q = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_k = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_v = nn.Linear(self.n_embd, self.n_embd, bias=False) # output projection self.c_proj = nn.Linear(self.n_embd, self.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 self.rotary = Rotary(self.head_dim) self.lamb = nn.Parameter(torch.tensor(0.5)) # @Grad62304977 def forward(self, x, v1=None): B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd) q = self.c_q(x).view(B, T, self.n_head, self.head_dim) k = self.c_k(x).view(B, T, self.n_head, self.head_dim) v = self.c_v(x).view(B, T, self.n_head, self.head_dim) if v1 is None: v1 = v # This happens if we are in the first block. v needs to be accessed by subsequent blocks v = (1 - self.lamb) * v + self.lamb * v1.view_as(v) # @Grad62304977 cos, sin = self.rotary(q) q, k = F.rms_norm(q, (q.size(-1),)), F.rms_norm(k, (k.size(-1),)) # QK norm suggested by @Grad62304977 q, k = apply_rotary_emb(q, cos, sin), apply_rotary_emb(k, cos, sin) y = F.scaled_dot_product_attention(q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), is_causal=True) y = y.transpose(1, 2).contiguous().view_as(x) # re-assemble all head outputs side by side y = self.c_proj(y) return y, v1 class MLP(nn.Module): def __init__(self, config): super().__init__() self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd, bias=False) self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 def forward(self, x): x = self.c_fc(x) x = F.relu(x).square() # https://arxiv.org/abs/2109.08668v2; ~1-2% better than GELU; suggested by @SKYLINEZ007 and @Grad62304977 x = self.c_proj(x) return x class Block(nn.Module): def __init__(self, config): super().__init__() self.attn = CausalSelfAttention(config) self.mlp = MLP(config) self.lambdas = nn.Parameter(torch.tensor([1., 0.])) def forward(self, x, v1, x0): x = self.lambdas[0] * x + self.lambdas[1] * x0 x1, v1 = self.attn(F.rms_norm(x, (x.size(-1),)), v1) x = x + x1 x = x + self.mlp(F.rms_norm(x, (x.size(-1),))) return x, v1 # ----------------------------------------------------------------------------- # The main GPT-2 model @dataclass class GPTConfig: vocab_size : int = 50304 n_layer : int = 12 n_head : int = 6 # head dim 128 suggested by @Grad62304977 n_embd : int = 768 class GPT(nn.Module): def __init__(self, config): super().__init__() self.config = config self.transformer = nn.ModuleDict(dict( wte = nn.Embedding(config.vocab_size, config.n_embd), h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]), )) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.lm_head.weight.data.zero_() # @Grad62304977 def forward(self, idx, targets=None, return_logits=True): # forward the GPT model itself x = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) x = F.rms_norm(x, (x.size(-1),)) # @Grad62304977 x0 = x v1 = None for block in self.transformer.h: x, v1 = block(x, v1, x0) x = F.rms_norm(x, (x.size(-1),)) if targets is not None: # if we are given some desired targets also calculate the loss logits = self.lm_head(x) logits = logits.float() # use tf32/fp32 for logits loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1) else: # inference-time mini-optimization: only forward the lm_head on the very last position logits = self.lm_head(x[:, [-1], :]) # note: using list [-1] to preserve the time dim logits = logits.float() # use tf32/fp32 for logits loss = None # there are performance reasons why not returning logits is prudent, if not needed if not return_logits: logits = None return logits, loss # ----------------------------------------------------------------------------- # Our own simple Distributed Data Loader def _peek_data_shard(filename): # only reads the header, returns header data with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) if header[0] != 20240520: print("ERROR: magic number mismatch in the data .bin file!") print("---> HINT: Are you passing in a correct file with --input_bin?") print("---> HINT: Dataset encoding changed recently, re-run data prepro or refer again to README") print("---> HINT: For example re-run: `python dev/data/tinyshakespeare.py`, then re-try") exit(1) assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) return ntok # for now just return the number of tokens def _load_data_shard(filename): with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) assert header[0] == 20240520, "magic number mismatch in the data .bin file" assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) # the rest of it are tokens, stored as uint16 tokens = np.frombuffer(f.read(), dtype=np.uint16) assert len(tokens) == ntok, "number of tokens read does not match header?" return tokens class DistributedDataLoader: def __init__(self, filename_pattern, B, T, process_rank, num_processes): self.process_rank = process_rank self.num_processes = num_processes self.B = B self.T = T # glob files that match the pattern self.files = sorted(glob.glob(filename_pattern)) assert len(self.files) > 0, f"did not find any files that match the pattern {filename_pattern}" # load and validate all data shards, count number of tokens in total ntok_total = 0 for fname in self.files: shard_ntok = _peek_data_shard(fname) assert shard_ntok >= num_processes * B * T + 1 ntok_total += int(shard_ntok) self.ntok_total = ntok_total # kick things off self.reset() def reset(self): self.current_shard = 0 self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def advance(self): # advance to next data shard self.current_shard = (self.current_shard + 1) % len(self.files) self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def next_batch(self): B = self.B T = self.T buf = self.tokens[self.current_position : self.current_position+B*T+1] buf = torch.tensor(buf.astype(np.int32), dtype=torch.long) x = (buf[:-1]).view(B, T) # inputs y = (buf[1:]).view(B, T) # targets # advance current position and load next shard if necessary self.current_position += B * T * self.num_processes if self.current_position + (B * T * self.num_processes + 1) > len(self.tokens): self.advance() return x.cuda(), y.cuda() # ----------------------------------------------------------------------------- # int main @dataclass class Hyperparameters: # data hyperparams input_bin : str = 'data/fineweb10B/fineweb_train_*.bin' # input .bin to train on input_val_bin : str = 'data/fineweb10B/fineweb_val_*.bin' # input .bin to eval validation loss on # optimization hyperparams batch_size : int = 8*64 # batch size, in sequences, across all devices device_batch_size : int = 64 # batch size, in sequences, per device sequence_length : int = 1024 # sequence length, in tokens num_iterations : int = 3200 # number of iterations to run warmup_iters : int = 0 warmdown_iters : int = 914 # number of iterations of linear warmup/warmdown for triangular or trapezoidal schedule weight_decay : float = 0 # evaluation and logging hyperparams val_loss_every : int = 125 # every how many steps to evaluate val loss? 0 for only at the end val_tokens : int = 10485760 # how many tokens of validation data? it's important to keep this fixed for consistent comparisons save_every : int = 0 # every how many steps to save the checkpoint? 0 for only at the end args = Hyperparameters() # set up DDP (distributed data parallel). torchrun sets this env variable assert torch.cuda.is_available() dist.init_process_group(backend='nccl') ddp_rank = int(os.environ['RANK']) ddp_local_rank = int(os.environ['LOCAL_RANK']) ddp_world_size = int(os.environ['WORLD_SIZE']) device = f'cuda:{ddp_local_rank}' torch.cuda.set_device(device) print(f"using device: {device}") master_process = (ddp_rank == 0) # this process will do logging, checkpointing etc. # convenience variables B, T = args.device_batch_size, args.sequence_length # calculate the number of steps to take in the val loop. assert args.val_tokens % (B * T * ddp_world_size) == 0 val_steps = args.val_tokens // (B * T * ddp_world_size) # calculate the steps of gradient accumulation required to attain the desired global batch size. assert args.batch_size % (B * ddp_world_size) == 0 train_accumulation_steps = args.batch_size // (B * ddp_world_size) # load tokens train_loader = DistributedDataLoader(args.input_bin, B, T, ddp_rank, ddp_world_size) val_loader = DistributedDataLoader(args.input_val_bin, B, T, ddp_rank, ddp_world_size) if master_process: print(f"Training DataLoader: total number of tokens: {train_loader.ntok_total} across {len(train_loader.files)} files") print(f"Validation DataLoader: total number of tokens: {val_loader.ntok_total} across {len(val_loader.files)} files") x, y = train_loader.next_batch() # there are only 50257 unique GPT-2 tokens; we extend to nearest multiple of 128 for efficiency. suggested to me by @Grad62304977. # this originates from Karpathy's experiments. num_vocab = 50304 model = GPT(GPTConfig(vocab_size=num_vocab, n_layer=12, n_head=6, n_embd=768)) model = model.cuda() if hasattr(config, "coordinate_descent_tuning"): config.coordinate_descent_tuning = True # suggested by @Chillee model = torch.compile(model) # here we wrap model into DDP container model = DDP(model, device_ids=[ddp_local_rank]) raw_model = model.module # always contains the "raw" unwrapped model ctx = torch.amp.autocast(device_type='cuda', dtype=torch.bfloat16) # CUDNN attention is ~4ms faster than Flash, but doesn't get selected by default in PyTorch 2.5.1 from torch.backends.cuda import enable_cudnn_sdp, enable_flash_sdp, enable_math_sdp, enable_mem_efficient_sdp enable_cudnn_sdp(True) enable_flash_sdp(False) enable_mem_efficient_sdp(False) enable_math_sdp(False) # init the optimizer(s) optimizer1 = torch.optim.Adam([raw_model.transformer.wte.weight], lr=0.3, betas=(0.9, 0.95), fused=True) optimizer2 = torch.optim.Adam([raw_model.lm_head.weight], lr=0.002, betas=(0.9, 0.95), fused=True) params = list(raw_model.transformer.h.parameters()) matrix_params = [p for p in params if p.ndim == 2] scalar_params = [p for p in params if p.ndim < 2] optimizer3 = Muon(matrix_params, lr=0.02, momentum=0.95) optimizer4 = torch.optim.Adam(scalar_params, lr=0.02, betas=(0.9, 0.95), fused=True) # note that this learning rate is neither sensitive nor tuned optimizers = [optimizer1, optimizer2, optimizer3, optimizer4] # learning rate decay scheduler (linear warmup and warmdown) def get_lr(it): assert it <= args.num_iterations # 1) linear warmup for warmup_iters steps if it < args.warmup_iters: return (it+1) / args.warmup_iters # 2) constant lr for a while elif it < args.num_iterations - args.warmdown_iters: return 1.0 # 3) linear warmdown else: decay_ratio = (args.num_iterations - it) / args.warmdown_iters return decay_ratio schedulers = [torch.optim.lr_scheduler.LambdaLR(opt, get_lr) for opt in optimizers] # begin logging if master_process: run_id = str(uuid.uuid4()) logdir = 'logs/%s/' % run_id os.makedirs(logdir, exist_ok=True) logfile = 'logs/%s.txt' % run_id # create the log file with open(logfile, "w") as f: # begin the log by printing this file (the Python code) f.write('='*100 + '\n') f.write(code) f.write('='*100 + '\n') # log information about the hardware/software environment this is running on # and print the full `nvidia-smi` to file f.write(f"Running pytorch {torch.version.__version__} compiled for CUDA {torch.version.cuda}\nnvidia-smi:\n") import subprocess result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) f.write(f'{result.stdout}\n') f.write('='*100 + '\n') training_time_ms = 0 # start the clock torch.cuda.synchronize() t0 = time.time() # begin training train_loader.reset() for step in range(args.num_iterations + 1): last_step = (step == args.num_iterations) # This effectively ignores timing first 10 steps, which are slower for weird reasons. # Alternately, and slightly more correctly in terms of benchmarking, we could do 10 # steps with dummy data first, and then re-initialize the model and reset the loader. if step == 10: training_time_ms = 0 t0 = time.time() timed_steps = float('nan') if step <= 11 else (step - 10) + 1 # <= 11 to avoid bug in val # once in a while evaluate the validation dataset if (last_step or (args.val_loss_every > 0 and step % args.val_loss_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # run validation batches model.eval() val_loader.reset() val_loss = 0.0 for _ in range(val_steps): x_val, y_val = val_loader.next_batch() with ctx: # of course, we'd like to use no_grad() here too, but that creates a torch.compile error for some reason _, loss = model(x_val, y_val, return_logits=False) val_loss += loss.detach() del loss dist.all_reduce(val_loss, op=dist.ReduceOp.AVG) val_loss /= val_steps # log val loss to console and to logfile if master_process: print(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms') with open(logfile, "a") as f: f.write(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms\n') # start the clock again torch.cuda.synchronize() t0 = time.time() if master_process and (last_step or (args.save_every > 0 and step % args.save_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # save the state of the training process log = dict(step=step, code=code, model=raw_model.state_dict(), optimizers=[opt.state_dict() for opt in optimizers]) torch.save(log, 'logs/%s/state_step%06d.pt' % (run_id, step)) # start the clock again torch.cuda.synchronize() t0 = time.time() # bit confusing: we want to make sure to eval on 0th iteration # but also after the very last iteration. so we loop for step <= num_iterations # instead of just < num_iterations (one extra due to <=), only to do # the validation/sampling one last time, and then we break right here as we're done. if last_step: break # --------------- TRAINING SECTION BEGIN ----------------- model.train() for i in range(1, train_accumulation_steps+1): # forward pass with ctx: _, loss = model(x, y, return_logits=False) train_loss = loss.detach() # advance the dataset for the next batch x, y = train_loader.next_batch() # backward pass if i < train_accumulation_steps: with model.no_sync(): # there's no need to sync gradients every accumulation step loss.backward() else: loss.backward() # just sync on the last step for p in model.parameters(): p.grad /= train_accumulation_steps # step the optimizers and schedulers for opt, sched in zip(optimizers, schedulers): opt.step() sched.step() # null the gradients model.zero_grad(set_to_none=True) # --------------- TRAINING SECTION END ------------------- # everything that follows now is just diagnostics, prints, logging, etc. #dist.all_reduce(train_loss, op=dist.ReduceOp.AVG) # all-reducing the training loss would be more correct in terms of logging, but slower if master_process: approx_time = training_time_ms + 1000 * (time.time() - t0) print(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms") with open(logfile, "a") as f: f.write(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms\n") if master_process: print(f"peak memory consumption: {torch.cuda.max_memory_allocated() // 1024 // 1024} MiB") # ------------------------------------------------------------------------- # clean up nice dist.destroy_process_group() ==================================================================================================== Running pytorch 2.5.1+cu124 compiled for CUDA 12.4 nvidia-smi: Wed Nov 6 20:15:05 2024 +-----------------------------------------------------------------------------------------+ | NVIDIA-SMI 555.42.06 Driver Version: 555.42.06 CUDA Version: 12.5 | |-----------------------------------------+------------------------+----------------------+ | GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |=========================================+========================+======================| | 0 NVIDIA H100 80GB HBM3 Off | 00000000:18:00.0 Off | 0 | | N/A 35C P0 141W / 700W | 5304MiB / 81559MiB | 3% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 1 NVIDIA H100 80GB HBM3 Off | 00000000:2A:00.0 Off | 0 | | N/A 38C P0 133W / 700W | 5352MiB / 81559MiB | 3% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 2 NVIDIA H100 80GB HBM3 Off | 00000000:3A:00.0 Off | 0 | | N/A 38C P0 127W / 700W | 5352MiB / 81559MiB | 3% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 3 NVIDIA H100 80GB HBM3 Off | 00000000:5D:00.0 Off | 0 | | N/A 34C P0 138W / 700W | 5352MiB / 81559MiB | 0% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 4 NVIDIA H100 80GB HBM3 Off | 00000000:9A:00.0 Off | 0 | | N/A 35C P0 142W / 700W | 5352MiB / 81559MiB | 2% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 5 NVIDIA H100 80GB HBM3 Off | 00000000:AB:00.0 Off | 0 | | N/A 40C P0 145W / 700W | 5352MiB / 81559MiB | 1% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 6 NVIDIA H100 80GB HBM3 Off | 00000000:BA:00.0 Off | 0 | | N/A 38C P0 144W / 700W | 5352MiB / 81559MiB | 4% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ | 7 NVIDIA H100 80GB HBM3 Off | 00000000:DB:00.0 Off | 0 | | N/A 35C P0 147W / 700W | 5112MiB / 81559MiB | 0% Default | | | | Disabled | +-----------------------------------------+------------------------+----------------------+ +-----------------------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=========================================================================================| | 0 N/A N/A 33883 C /usr/bin/python3 0MiB | | 1 N/A N/A 33884 C /usr/bin/python3 0MiB | | 2 N/A N/A 33885 C /usr/bin/python3 0MiB | | 3 N/A N/A 33886 C /usr/bin/python3 0MiB | | 4 N/A N/A 33887 C /usr/bin/python3 0MiB | | 5 N/A N/A 33888 C /usr/bin/python3 0MiB | | 6 N/A N/A 33889 C /usr/bin/python3 0MiB | | 7 N/A N/A 33890 C /usr/bin/python3 0MiB | +-----------------------------------------------------------------------------------------+ ==================================================================================================== step:0/3200 val_loss:10.8258 train_time:515ms step_avg:nanms step:1/3200 train_loss:10.8258 train_time:3650ms step_avg:nanms step:2/3200 train_loss:10.4276 train_time:3755ms step_avg:nanms step:3/3200 train_loss:9.9528 train_time:3899ms step_avg:nanms step:4/3200 train_loss:9.0457 train_time:4046ms step_avg:nanms step:5/3200 train_loss:8.0574 train_time:4194ms step_avg:nanms step:6/3200 train_loss:7.5265 train_time:4341ms step_avg:nanms step:7/3200 train_loss:7.0098 train_time:4490ms step_avg:nanms step:8/3200 train_loss:7.2995 train_time:4639ms step_avg:nanms step:9/3200 train_loss:6.9067 train_time:4791ms step_avg:nanms step:10/3200 train_loss:6.7998 train_time:4942ms step_avg:nanms step:11/3200 train_loss:6.7971 train_time:102ms step_avg:nanms step:12/3200 train_loss:6.6835 train_time:251ms step_avg:nanms step:13/3200 train_loss:6.5164 train_time:400ms step_avg:133.19ms step:14/3200 train_loss:6.5190 train_time:548ms step_avg:137.03ms step:15/3200 train_loss:6.4851 train_time:698ms step_avg:139.55ms step:16/3200 train_loss:6.4405 train_time:849ms step_avg:141.46ms step:17/3200 train_loss:6.4428 train_time:1000ms step_avg:142.86ms step:18/3200 train_loss:6.4792 train_time:1149ms step_avg:143.68ms step:19/3200 train_loss:6.3196 train_time:1299ms step_avg:144.33ms step:20/3200 train_loss:6.3393 train_time:1447ms step_avg:144.74ms step:21/3200 train_loss:6.0505 train_time:1596ms step_avg:145.12ms step:22/3200 train_loss:6.3755 train_time:1747ms step_avg:145.59ms step:23/3200 train_loss:6.6268 train_time:1898ms step_avg:146.00ms step:24/3200 train_loss:6.2713 train_time:2049ms step_avg:146.36ms step:25/3200 train_loss:6.4297 train_time:2199ms step_avg:146.63ms step:26/3200 train_loss:6.1401 train_time:2349ms step_avg:146.83ms step:27/3200 train_loss:6.0553 train_time:2499ms step_avg:147.02ms step:28/3200 train_loss:6.2325 train_time:2649ms step_avg:147.17ms step:29/3200 train_loss:5.8930 train_time:2798ms step_avg:147.27ms step:30/3200 train_loss:6.1561 train_time:2950ms step_avg:147.49ms step:31/3200 train_loss:5.9989 train_time:3102ms step_avg:147.70ms step:32/3200 train_loss:5.9638 train_time:3251ms step_avg:147.78ms step:33/3200 train_loss:5.8048 train_time:3401ms step_avg:147.87ms step:34/3200 train_loss:6.1083 train_time:3551ms step_avg:147.95ms step:35/3200 train_loss:6.0258 train_time:3700ms step_avg:148.02ms step:36/3200 train_loss:6.1826 train_time:3850ms step_avg:148.06ms step:37/3200 train_loss:6.0936 train_time:4001ms step_avg:148.18ms step:38/3200 train_loss:5.9884 train_time:4152ms step_avg:148.27ms step:39/3200 train_loss:5.8808 train_time:4303ms step_avg:148.37ms step:40/3200 train_loss:5.9063 train_time:4452ms step_avg:148.40ms step:41/3200 train_loss:5.8273 train_time:4604ms step_avg:148.52ms step:42/3200 train_loss:5.8375 train_time:4755ms step_avg:148.59ms step:43/3200 train_loss:5.7453 train_time:4906ms step_avg:148.68ms step:44/3200 train_loss:5.8076 train_time:5058ms step_avg:148.76ms step:45/3200 train_loss:5.7977 train_time:5209ms step_avg:148.84ms step:46/3200 train_loss:5.9489 train_time:5360ms step_avg:148.90ms step:47/3200 train_loss:5.7485 train_time:5510ms step_avg:148.91ms step:48/3200 train_loss:5.6056 train_time:5660ms step_avg:148.96ms step:49/3200 train_loss:5.8074 train_time:5810ms step_avg:148.98ms step:50/3200 train_loss:5.6822 train_time:5960ms step_avg:148.99ms step:51/3200 train_loss:5.8401 train_time:6110ms step_avg:149.01ms step:52/3200 train_loss:5.7009 train_time:6261ms step_avg:149.08ms step:53/3200 train_loss:5.5531 train_time:6411ms step_avg:149.08ms step:54/3200 train_loss:5.6723 train_time:6561ms step_avg:149.12ms step:55/3200 train_loss:5.5591 train_time:6710ms step_avg:149.12ms step:56/3200 train_loss:5.8843 train_time:6861ms step_avg:149.15ms step:57/3200 train_loss:5.5360 train_time:7010ms step_avg:149.14ms step:58/3200 train_loss:5.4245 train_time:7161ms step_avg:149.19ms step:59/3200 train_loss:5.5643 train_time:7310ms step_avg:149.18ms step:60/3200 train_loss:5.5095 train_time:7461ms step_avg:149.21ms step:61/3200 train_loss:5.6186 train_time:7610ms step_avg:149.21ms step:62/3200 train_loss:5.3837 train_time:7761ms step_avg:149.25ms step:63/3200 train_loss:5.4794 train_time:7911ms step_avg:149.26ms step:64/3200 train_loss:5.4460 train_time:8061ms step_avg:149.28ms step:65/3200 train_loss:5.2782 train_time:8212ms step_avg:149.31ms step:66/3200 train_loss:5.2727 train_time:8364ms step_avg:149.35ms step:67/3200 train_loss:5.4283 train_time:8513ms step_avg:149.36ms step:68/3200 train_loss:5.2820 train_time:8666ms step_avg:149.41ms step:69/3200 train_loss:5.5313 train_time:8814ms step_avg:149.40ms step:70/3200 train_loss:5.1887 train_time:8966ms step_avg:149.43ms step:71/3200 train_loss:5.2644 train_time:9115ms step_avg:149.42ms step:72/3200 train_loss:5.4213 train_time:9266ms step_avg:149.45ms step:73/3200 train_loss:5.3420 train_time:9415ms step_avg:149.44ms step:74/3200 train_loss:5.2385 train_time:9566ms step_avg:149.47ms step:75/3200 train_loss:5.3461 train_time:9715ms step_avg:149.47ms step:76/3200 train_loss:5.3295 train_time:9866ms step_avg:149.48ms step:77/3200 train_loss:5.2639 train_time:10017ms step_avg:149.51ms step:78/3200 train_loss:5.3641 train_time:10168ms step_avg:149.53ms step:79/3200 train_loss:5.4751 train_time:10319ms step_avg:149.54ms step:80/3200 train_loss:5.2328 train_time:10470ms step_avg:149.56ms step:81/3200 train_loss:5.3000 train_time:10621ms step_avg:149.59ms step:82/3200 train_loss:5.0600 train_time:10769ms step_avg:149.57ms step:83/3200 train_loss:5.2479 train_time:10920ms step_avg:149.59ms step:84/3200 train_loss:5.1874 train_time:11069ms step_avg:149.59ms step:85/3200 train_loss:5.1739 train_time:11219ms step_avg:149.59ms step:86/3200 train_loss:5.0397 train_time:11369ms step_avg:149.59ms step:87/3200 train_loss:5.2362 train_time:11520ms step_avg:149.61ms step:88/3200 train_loss:5.1368 train_time:11669ms step_avg:149.60ms step:89/3200 train_loss:5.2038 train_time:11820ms step_avg:149.62ms step:90/3200 train_loss:5.1787 train_time:11969ms step_avg:149.61ms step:91/3200 train_loss:5.0784 train_time:12119ms step_avg:149.62ms step:92/3200 train_loss:5.0806 train_time:12270ms step_avg:149.63ms step:93/3200 train_loss:5.1881 train_time:12420ms step_avg:149.64ms step:94/3200 train_loss:5.0211 train_time:12569ms step_avg:149.63ms step:95/3200 train_loss:5.0249 train_time:12719ms step_avg:149.64ms step:96/3200 train_loss:5.0711 train_time:12869ms step_avg:149.64ms step:97/3200 train_loss:4.9695 train_time:13020ms step_avg:149.66ms step:98/3200 train_loss:5.0484 train_time:13169ms step_avg:149.65ms step:99/3200 train_loss:4.9727 train_time:13320ms step_avg:149.67ms step:100/3200 train_loss:5.0854 train_time:13470ms step_avg:149.66ms step:101/3200 train_loss:5.0514 train_time:13619ms step_avg:149.66ms step:102/3200 train_loss:4.9390 train_time:13768ms step_avg:149.65ms step:103/3200 train_loss:5.0605 train_time:13919ms step_avg:149.67ms step:104/3200 train_loss:5.0106 train_time:14069ms step_avg:149.67ms step:105/3200 train_loss:4.8760 train_time:14218ms step_avg:149.67ms step:106/3200 train_loss:4.9367 train_time:14369ms step_avg:149.68ms step:107/3200 train_loss:5.1250 train_time:14520ms step_avg:149.69ms step:108/3200 train_loss:4.9200 train_time:14669ms step_avg:149.68ms step:109/3200 train_loss:4.7325 train_time:14819ms step_avg:149.69ms step:110/3200 train_loss:4.8961 train_time:14969ms step_avg:149.69ms step:111/3200 train_loss:4.8879 train_time:15119ms step_avg:149.69ms step:112/3200 train_loss:4.8408 train_time:15269ms step_avg:149.70ms step:113/3200 train_loss:4.9638 train_time:15420ms step_avg:149.71ms step:114/3200 train_loss:4.8735 train_time:15570ms step_avg:149.71ms step:115/3200 train_loss:4.7354 train_time:15720ms step_avg:149.72ms step:116/3200 train_loss:4.8887 train_time:15869ms step_avg:149.71ms step:117/3200 train_loss:4.8075 train_time:16020ms step_avg:149.72ms step:118/3200 train_loss:4.7469 train_time:16169ms step_avg:149.71ms step:119/3200 train_loss:4.9054 train_time:16319ms step_avg:149.72ms step:120/3200 train_loss:4.8405 train_time:16469ms step_avg:149.72ms step:121/3200 train_loss:4.7617 train_time:16620ms step_avg:149.73ms step:122/3200 train_loss:4.6676 train_time:16769ms step_avg:149.72ms step:123/3200 train_loss:4.7918 train_time:16919ms step_avg:149.73ms step:124/3200 train_loss:4.6485 train_time:17069ms step_avg:149.73ms step:125/3200 train_loss:4.9572 train_time:17219ms step_avg:149.73ms step:125/3200 val_loss:4.7794 train_time:17264ms step_avg:150.13ms step:126/3200 train_loss:4.8217 train_time:17376ms step_avg:149.80ms step:127/3200 train_loss:4.7738 train_time:17528ms step_avg:149.81ms step:128/3200 train_loss:4.8165 train_time:17677ms step_avg:149.80ms step:129/3200 train_loss:4.7114 train_time:17826ms step_avg:149.80ms step:130/3200 train_loss:5.0027 train_time:17976ms step_avg:149.80ms step:131/3200 train_loss:4.7494 train_time:18125ms step_avg:149.79ms step:132/3200 train_loss:4.7532 train_time:18277ms step_avg:149.81ms step:133/3200 train_loss:4.6922 train_time:18428ms step_avg:149.82ms step:134/3200 train_loss:4.7497 train_time:18579ms step_avg:149.83ms step:135/3200 train_loss:4.6264 train_time:18729ms step_avg:149.83ms step:136/3200 train_loss:4.7559 train_time:18881ms step_avg:149.85ms step:137/3200 train_loss:4.5442 train_time:19028ms step_avg:149.82ms step:138/3200 train_loss:4.6972 train_time:19178ms step_avg:149.83ms step:139/3200 train_loss:4.6321 train_time:19327ms step_avg:149.82ms step:140/3200 train_loss:4.6812 train_time:19479ms step_avg:149.84ms step:141/3200 train_loss:4.7385 train_time:19629ms step_avg:149.84ms step:142/3200 train_loss:4.6216 train_time:19779ms step_avg:149.84ms step:143/3200 train_loss:4.6521 train_time:19927ms step_avg:149.83ms step:144/3200 train_loss:4.5263 train_time:20077ms step_avg:149.83ms step:145/3200 train_loss:4.6471 train_time:20225ms step_avg:149.82ms step:146/3200 train_loss:4.6005 train_time:20376ms step_avg:149.83ms step:147/3200 train_loss:4.4851 train_time:20527ms step_avg:149.83ms step:148/3200 train_loss:4.6255 train_time:20678ms step_avg:149.84ms step:149/3200 train_loss:4.6381 train_time:20826ms step_avg:149.83ms step:150/3200 train_loss:4.6230 train_time:20977ms step_avg:149.84ms step:151/3200 train_loss:4.6901 train_time:21125ms step_avg:149.82ms step:152/3200 train_loss:4.5492 train_time:21276ms step_avg:149.83ms step:153/3200 train_loss:4.5552 train_time:21426ms step_avg:149.83ms step:154/3200 train_loss:4.6455 train_time:21577ms step_avg:149.84ms step:155/3200 train_loss:4.6061 train_time:21727ms step_avg:149.84ms step:156/3200 train_loss:4.5344 train_time:21878ms step_avg:149.85ms step:157/3200 train_loss:4.5849 train_time:22027ms step_avg:149.84ms step:158/3200 train_loss:4.6693 train_time:22177ms step_avg:149.84ms step:159/3200 train_loss:4.4840 train_time:22326ms step_avg:149.84ms step:160/3200 train_loss:4.5504 train_time:22479ms step_avg:149.86ms step:161/3200 train_loss:4.3754 train_time:22628ms step_avg:149.86ms step:162/3200 train_loss:4.5656 train_time:22780ms step_avg:149.87ms step:163/3200 train_loss:4.5752 train_time:22928ms step_avg:149.86ms step:164/3200 train_loss:4.5674 train_time:23080ms step_avg:149.87ms step:165/3200 train_loss:4.4112 train_time:23228ms step_avg:149.86ms step:166/3200 train_loss:4.5009 train_time:23379ms step_avg:149.86ms step:167/3200 train_loss:4.5863 train_time:23528ms step_avg:149.86ms step:168/3200 train_loss:4.4166 train_time:23678ms step_avg:149.86ms step:169/3200 train_loss:4.5003 train_time:23826ms step_avg:149.85ms step:170/3200 train_loss:4.3859 train_time:23976ms step_avg:149.85ms step:171/3200 train_loss:4.2651 train_time:24125ms step_avg:149.85ms step:172/3200 train_loss:4.4066 train_time:24275ms step_avg:149.85ms step:173/3200 train_loss:4.4144 train_time:24425ms step_avg:149.84ms step:174/3200 train_loss:4.4575 train_time:24576ms step_avg:149.85ms step:175/3200 train_loss:4.6160 train_time:24724ms step_avg:149.84ms step:176/3200 train_loss:4.4495 train_time:24875ms step_avg:149.85ms step:177/3200 train_loss:4.3112 train_time:25025ms step_avg:149.85ms step:178/3200 train_loss:4.2716 train_time:25175ms step_avg:149.85ms step:179/3200 train_loss:4.3692 train_time:25325ms step_avg:149.85ms step:180/3200 train_loss:4.3384 train_time:25476ms step_avg:149.86ms step:181/3200 train_loss:4.3175 train_time:25625ms step_avg:149.85ms step:182/3200 train_loss:4.4772 train_time:25776ms step_avg:149.86ms step:183/3200 train_loss:4.3552 train_time:25926ms step_avg:149.86ms step:184/3200 train_loss:4.3298 train_time:26076ms step_avg:149.86ms step:185/3200 train_loss:4.3196 train_time:26225ms step_avg:149.86ms step:186/3200 train_loss:4.4076 train_time:26376ms step_avg:149.86ms step:187/3200 train_loss:4.3600 train_time:26526ms step_avg:149.86ms step:188/3200 train_loss:4.4548 train_time:26676ms step_avg:149.86ms step:189/3200 train_loss:4.3641 train_time:26968ms step_avg:150.66ms step:190/3200 train_loss:4.2894 train_time:27305ms step_avg:151.69ms step:191/3200 train_loss:4.3952 train_time:27450ms step_avg:151.66ms step:192/3200 train_loss:4.2634 train_time:27601ms step_avg:151.65ms step:193/3200 train_loss:4.2033 train_time:27748ms step_avg:151.63ms step:194/3200 train_loss:4.4188 train_time:27899ms step_avg:151.62ms step:195/3200 train_loss:4.3332 train_time:28046ms step_avg:151.60ms step:196/3200 train_loss:4.5358 train_time:28198ms step_avg:151.60ms step:197/3200 train_loss:4.3815 train_time:28350ms step_avg:151.60ms step:198/3200 train_loss:4.2234 train_time:28500ms step_avg:151.60ms step:199/3200 train_loss:4.3405 train_time:28649ms step_avg:151.58ms step:200/3200 train_loss:4.2040 train_time:28799ms step_avg:151.58ms step:201/3200 train_loss:4.2916 train_time:28947ms step_avg:151.55ms step:202/3200 train_loss:4.1805 train_time:29098ms step_avg:151.55ms step:203/3200 train_loss:4.4057 train_time:29249ms step_avg:151.55ms step:204/3200 train_loss:4.2381 train_time:29400ms step_avg:151.54ms step:205/3200 train_loss:4.3456 train_time:29548ms step_avg:151.53ms step:206/3200 train_loss:4.4060 train_time:29699ms step_avg:151.53ms step:207/3200 train_loss:4.1078 train_time:29848ms step_avg:151.51ms step:208/3200 train_loss:4.2494 train_time:29999ms step_avg:151.51ms step:209/3200 train_loss:4.2435 train_time:30147ms step_avg:151.49ms step:210/3200 train_loss:4.3973 train_time:30299ms step_avg:151.49ms step:211/3200 train_loss:4.3331 train_time:30448ms step_avg:151.48ms step:212/3200 train_loss:4.2174 train_time:30599ms step_avg:151.48ms step:213/3200 train_loss:4.2532 train_time:30748ms step_avg:151.47ms step:214/3200 train_loss:4.1933 train_time:30898ms step_avg:151.46ms step:215/3200 train_loss:4.2629 train_time:31046ms step_avg:151.45ms step:216/3200 train_loss:4.0886 train_time:31197ms step_avg:151.44ms step:217/3200 train_loss:4.1586 train_time:31346ms step_avg:151.43ms step:218/3200 train_loss:4.1637 train_time:31498ms step_avg:151.43ms step:219/3200 train_loss:4.2288 train_time:31646ms step_avg:151.42ms step:220/3200 train_loss:4.2210 train_time:31797ms step_avg:151.41ms step:221/3200 train_loss:4.2355 train_time:31946ms step_avg:151.40ms step:222/3200 train_loss:4.2532 train_time:32098ms step_avg:151.40ms step:223/3200 train_loss:4.1681 train_time:32246ms step_avg:151.39ms step:224/3200 train_loss:4.1261 train_time:32397ms step_avg:151.39ms step:225/3200 train_loss:4.4322 train_time:32547ms step_avg:151.38ms step:226/3200 train_loss:4.0574 train_time:32696ms step_avg:151.37ms step:227/3200 train_loss:4.1316 train_time:32845ms step_avg:151.36ms step:228/3200 train_loss:4.1320 train_time:32997ms step_avg:151.36ms step:229/3200 train_loss:4.2832 train_time:33146ms step_avg:151.35ms step:230/3200 train_loss:4.0642 train_time:33297ms step_avg:151.35ms step:231/3200 train_loss:4.1927 train_time:33447ms step_avg:151.34ms step:232/3200 train_loss:4.0529 train_time:33597ms step_avg:151.34ms step:233/3200 train_loss:4.1100 train_time:33745ms step_avg:151.32ms step:234/3200 train_loss:4.2469 train_time:33896ms step_avg:151.32ms step:235/3200 train_loss:4.1536 train_time:34045ms step_avg:151.31ms step:236/3200 train_loss:4.0498 train_time:34196ms step_avg:151.31ms step:237/3200 train_loss:4.2116 train_time:34345ms step_avg:151.30ms step:238/3200 train_loss:4.2239 train_time:34495ms step_avg:151.29ms step:239/3200 train_loss:4.0821 train_time:34645ms step_avg:151.29ms step:240/3200 train_loss:4.2210 train_time:34796ms step_avg:151.29ms step:241/3200 train_loss:4.2494 train_time:34945ms step_avg:151.28ms step:242/3200 train_loss:4.0997 train_time:35096ms step_avg:151.28ms step:243/3200 train_loss:4.2775 train_time:35245ms step_avg:151.27ms step:244/3200 train_loss:4.1494 train_time:35397ms step_avg:151.27ms step:245/3200 train_loss:4.2050 train_time:35546ms step_avg:151.26ms step:246/3200 train_loss:4.2713 train_time:35696ms step_avg:151.25ms step:247/3200 train_loss:4.1989 train_time:35845ms step_avg:151.24ms step:248/3200 train_loss:4.1392 train_time:35996ms step_avg:151.24ms step:249/3200 train_loss:4.2456 train_time:36146ms step_avg:151.24ms step:250/3200 train_loss:4.0540 train_time:36297ms step_avg:151.24ms step:250/3200 val_loss:4.1398 train_time:36343ms step_avg:151.43ms step:251/3200 train_loss:4.1043 train_time:36453ms step_avg:151.26ms step:252/3200 train_loss:4.2107 train_time:36606ms step_avg:151.27ms step:253/3200 train_loss:4.2712 train_time:36754ms step_avg:151.25ms step:254/3200 train_loss:4.0626 train_time:36904ms step_avg:151.24ms step:255/3200 train_loss:4.0207 train_time:37051ms step_avg:151.23ms step:256/3200 train_loss:4.1956 train_time:37200ms step_avg:151.22ms step:257/3200 train_loss:4.1041 train_time:37350ms step_avg:151.21ms step:258/3200 train_loss:4.1197 train_time:37504ms step_avg:151.22ms step:259/3200 train_loss:4.0988 train_time:37654ms step_avg:151.22ms step:260/3200 train_loss:4.1499 train_time:37804ms step_avg:151.22ms step:261/3200 train_loss:4.1843 train_time:37953ms step_avg:151.21ms step:262/3200 train_loss:4.1525 train_time:38103ms step_avg:151.20ms step:263/3200 train_loss:4.1090 train_time:38252ms step_avg:151.19ms step:264/3200 train_loss:4.0220 train_time:38402ms step_avg:151.19ms step:265/3200 train_loss:4.1074 train_time:38553ms step_avg:151.19ms step:266/3200 train_loss:3.9882 train_time:38704ms step_avg:151.19ms step:267/3200 train_loss:4.0368 train_time:38854ms step_avg:151.18ms step:268/3200 train_loss:4.0466 train_time:39004ms step_avg:151.18ms step:269/3200 train_loss:4.0714 train_time:39152ms step_avg:151.17ms step:270/3200 train_loss:3.9829 train_time:39302ms step_avg:151.16ms step:271/3200 train_loss:4.2154 train_time:39453ms step_avg:151.16ms step:272/3200 train_loss:4.1052 train_time:39606ms step_avg:151.17ms step:273/3200 train_loss:4.0251 train_time:39754ms step_avg:151.16ms step:274/3200 train_loss:4.0727 train_time:39905ms step_avg:151.15ms step:275/3200 train_loss:4.1579 train_time:40054ms step_avg:151.15ms step:276/3200 train_loss:4.1791 train_time:40204ms step_avg:151.14ms step:277/3200 train_loss:4.3411 train_time:40353ms step_avg:151.14ms step:278/3200 train_loss:4.1509 train_time:40505ms step_avg:151.14ms step:279/3200 train_loss:4.2007 train_time:40655ms step_avg:151.13ms step:280/3200 train_loss:4.1151 train_time:40805ms step_avg:151.13ms step:281/3200 train_loss:4.2314 train_time:40954ms step_avg:151.12ms step:282/3200 train_loss:4.0724 train_time:41104ms step_avg:151.12ms step:283/3200 train_loss:4.0771 train_time:41254ms step_avg:151.11ms step:284/3200 train_loss:4.0211 train_time:41404ms step_avg:151.11ms step:285/3200 train_loss:4.1638 train_time:41553ms step_avg:151.10ms step:286/3200 train_loss:4.1729 train_time:41705ms step_avg:151.11ms step:287/3200 train_loss:4.2071 train_time:41854ms step_avg:151.10ms step:288/3200 train_loss:4.0336 train_time:42006ms step_avg:151.10ms step:289/3200 train_loss:4.1337 train_time:42155ms step_avg:151.09ms step:290/3200 train_loss:3.9901 train_time:42306ms step_avg:151.09ms step:291/3200 train_loss:3.9821 train_time:42454ms step_avg:151.08ms step:292/3200 train_loss:4.0559 train_time:42605ms step_avg:151.08ms step:293/3200 train_loss:3.9780 train_time:42754ms step_avg:151.07ms step:294/3200 train_loss:4.0264 train_time:42905ms step_avg:151.07ms step:295/3200 train_loss:4.0657 train_time:43055ms step_avg:151.07ms step:296/3200 train_loss:3.9524 train_time:43206ms step_avg:151.07ms step:297/3200 train_loss:3.9741 train_time:43356ms step_avg:151.07ms step:298/3200 train_loss:3.9761 train_time:43507ms step_avg:151.07ms step:299/3200 train_loss:4.0815 train_time:43657ms step_avg:151.06ms step:300/3200 train_loss:3.9478 train_time:43808ms step_avg:151.06ms step:301/3200 train_loss:4.0780 train_time:43958ms step_avg:151.06ms step:302/3200 train_loss:4.0878 train_time:44109ms step_avg:151.06ms step:303/3200 train_loss:4.0379 train_time:44259ms step_avg:151.06ms step:304/3200 train_loss:4.0882 train_time:44409ms step_avg:151.05ms step:305/3200 train_loss:4.0714 train_time:44559ms step_avg:151.05ms step:306/3200 train_loss:4.5516 train_time:44708ms step_avg:151.04ms step:307/3200 train_loss:4.0465 train_time:44858ms step_avg:151.04ms step:308/3200 train_loss:3.9472 train_time:45009ms step_avg:151.04ms step:309/3200 train_loss:4.0984 train_time:45160ms step_avg:151.04ms step:310/3200 train_loss:3.9704 train_time:45309ms step_avg:151.03ms step:311/3200 train_loss:4.1855 train_time:45461ms step_avg:151.03ms step:312/3200 train_loss:4.0302 train_time:45611ms step_avg:151.03ms step:313/3200 train_loss:3.9858 train_time:45761ms step_avg:151.03ms step:314/3200 train_loss:4.0661 train_time:45910ms step_avg:151.02ms step:315/3200 train_loss:4.1931 train_time:46061ms step_avg:151.02ms step:316/3200 train_loss:4.0657 train_time:46210ms step_avg:151.01ms step:317/3200 train_loss:3.9031 train_time:46361ms step_avg:151.01ms step:318/3200 train_loss:3.9851 train_time:46510ms step_avg:151.01ms step:319/3200 train_loss:4.0209 train_time:46661ms step_avg:151.01ms step:320/3200 train_loss:3.9942 train_time:46809ms step_avg:151.00ms step:321/3200 train_loss:4.1148 train_time:46959ms step_avg:150.99ms step:322/3200 train_loss:4.0588 train_time:47110ms step_avg:150.99ms step:323/3200 train_loss:4.0279 train_time:47260ms step_avg:150.99ms step:324/3200 train_loss:4.1160 train_time:47410ms step_avg:150.99ms step:325/3200 train_loss:4.0568 train_time:47559ms step_avg:150.98ms step:326/3200 train_loss:4.1284 train_time:47710ms step_avg:150.98ms step:327/3200 train_loss:3.9919 train_time:47861ms step_avg:150.98ms step:328/3200 train_loss:4.4956 train_time:48009ms step_avg:150.97ms step:329/3200 train_loss:4.1721 train_time:48161ms step_avg:150.98ms step:330/3200 train_loss:3.9186 train_time:48311ms step_avg:150.97ms step:331/3200 train_loss:3.8603 train_time:48461ms step_avg:150.97ms step:332/3200 train_loss:4.0784 train_time:48610ms step_avg:150.96ms step:333/3200 train_loss:4.0161 train_time:48761ms step_avg:150.96ms step:334/3200 train_loss:3.9817 train_time:48910ms step_avg:150.96ms step:335/3200 train_loss:3.9462 train_time:49062ms step_avg:150.96ms step:336/3200 train_loss:4.1150 train_time:49211ms step_avg:150.96ms step:337/3200 train_loss:4.0643 train_time:49362ms step_avg:150.95ms step:338/3200 train_loss:4.5185 train_time:49511ms step_avg:150.95ms step:339/3200 train_loss:4.0402 train_time:49661ms step_avg:150.95ms step:340/3200 train_loss:3.9844 train_time:49812ms step_avg:150.94ms step:341/3200 train_loss:4.0356 train_time:49963ms step_avg:150.95ms step:342/3200 train_loss:3.9487 train_time:50112ms step_avg:150.94ms step:343/3200 train_loss:3.9186 train_time:50263ms step_avg:150.94ms step:344/3200 train_loss:3.9553 train_time:50411ms step_avg:150.93ms step:345/3200 train_loss:4.0986 train_time:50561ms step_avg:150.93ms step:346/3200 train_loss:3.9379 train_time:50711ms step_avg:150.92ms step:347/3200 train_loss:3.8761 train_time:50861ms step_avg:150.92ms step:348/3200 train_loss:3.9138 train_time:51012ms step_avg:150.92ms step:349/3200 train_loss:3.9643 train_time:51162ms step_avg:150.92ms step:350/3200 train_loss:3.9300 train_time:51311ms step_avg:150.91ms step:351/3200 train_loss:3.6660 train_time:51460ms step_avg:150.91ms step:352/3200 train_loss:3.9232 train_time:51610ms step_avg:150.91ms step:353/3200 train_loss:4.2518 train_time:51759ms step_avg:150.90ms step:354/3200 train_loss:3.7621 train_time:51910ms step_avg:150.90ms step:355/3200 train_loss:4.0270 train_time:52061ms step_avg:150.90ms step:356/3200 train_loss:3.8930 train_time:52213ms step_avg:150.90ms step:357/3200 train_loss:4.0015 train_time:52363ms step_avg:150.90ms step:358/3200 train_loss:3.9193 train_time:52512ms step_avg:150.90ms step:359/3200 train_loss:3.9535 train_time:52661ms step_avg:150.89ms step:360/3200 train_loss:3.9817 train_time:52810ms step_avg:150.88ms step:361/3200 train_loss:3.5615 train_time:52961ms step_avg:150.89ms step:362/3200 train_loss:4.1233 train_time:53111ms step_avg:150.88ms step:363/3200 train_loss:4.0180 train_time:53264ms step_avg:150.89ms step:364/3200 train_loss:3.9405 train_time:53412ms step_avg:150.88ms step:365/3200 train_loss:3.8475 train_time:53563ms step_avg:150.88ms step:366/3200 train_loss:4.0171 train_time:53712ms step_avg:150.88ms step:367/3200 train_loss:3.9707 train_time:53863ms step_avg:150.88ms step:368/3200 train_loss:3.9626 train_time:54011ms step_avg:150.87ms step:369/3200 train_loss:3.9520 train_time:54163ms step_avg:150.87ms step:370/3200 train_loss:3.8447 train_time:54312ms step_avg:150.87ms step:371/3200 train_loss:3.9878 train_time:54463ms step_avg:150.87ms step:372/3200 train_loss:3.8586 train_time:54611ms step_avg:150.86ms step:373/3200 train_loss:3.8020 train_time:54761ms step_avg:150.86ms step:374/3200 train_loss:4.0194 train_time:54910ms step_avg:150.85ms step:375/3200 train_loss:3.9408 train_time:55061ms step_avg:150.85ms step:375/3200 val_loss:3.9360 train_time:55108ms step_avg:150.98ms step:376/3200 train_loss:3.9145 train_time:55222ms step_avg:150.88ms step:377/3200 train_loss:3.9737 train_time:55373ms step_avg:150.88ms step:378/3200 train_loss:3.8951 train_time:55674ms step_avg:151.29ms step:379/3200 train_loss:3.9485 train_time:55829ms step_avg:151.30ms step:380/3200 train_loss:3.9755 train_time:56146ms step_avg:151.75ms step:381/3200 train_loss:4.0504 train_time:56301ms step_avg:151.75ms step:382/3200 train_loss:3.9456 train_time:56450ms step_avg:151.75ms step:383/3200 train_loss:3.9221 train_time:56599ms step_avg:151.74ms step:384/3200 train_loss:3.8966 train_time:56746ms step_avg:151.73ms step:385/3200 train_loss:3.9721 train_time:56894ms step_avg:151.72ms step:386/3200 train_loss:3.8887 train_time:57045ms step_avg:151.72ms step:387/3200 train_loss:3.9911 train_time:57201ms step_avg:151.73ms step:388/3200 train_loss:4.1758 train_time:57352ms step_avg:151.73ms step:389/3200 train_loss:3.9033 train_time:57502ms step_avg:151.72ms step:390/3200 train_loss:3.8910 train_time:57650ms step_avg:151.71ms step:391/3200 train_loss:3.9910 train_time:57799ms step_avg:151.70ms step:392/3200 train_loss:3.9135 train_time:57948ms step_avg:151.70ms step:393/3200 train_loss:4.0252 train_time:58100ms step_avg:151.70ms step:394/3200 train_loss:3.8640 train_time:58251ms step_avg:151.70ms step:395/3200 train_loss:3.9940 train_time:58403ms step_avg:151.69ms step:396/3200 train_loss:3.7434 train_time:58553ms step_avg:151.69ms step:397/3200 train_loss:3.9482 train_time:58702ms step_avg:151.68ms step:398/3200 train_loss:3.9794 train_time:58851ms step_avg:151.68ms step:399/3200 train_loss:3.9886 train_time:59002ms step_avg:151.68ms step:400/3200 train_loss:3.8905 train_time:59153ms step_avg:151.67ms step:401/3200 train_loss:3.9418 train_time:59304ms step_avg:151.67ms step:402/3200 train_loss:4.0126 train_time:59455ms step_avg:151.67ms step:403/3200 train_loss:3.9471 train_time:59605ms step_avg:151.67ms step:404/3200 train_loss:4.0579 train_time:59754ms step_avg:151.66ms step:405/3200 train_loss:3.8052 train_time:59902ms step_avg:151.65ms step:406/3200 train_loss:3.9017 train_time:60053ms step_avg:151.65ms step:407/3200 train_loss:4.1919 train_time:60204ms step_avg:151.65ms step:408/3200 train_loss:3.8960 train_time:60356ms step_avg:151.65ms step:409/3200 train_loss:3.9248 train_time:60505ms step_avg:151.64ms step:410/3200 train_loss:3.9695 train_time:60655ms step_avg:151.64ms step:411/3200 train_loss:3.8614 train_time:60803ms step_avg:151.63ms step:412/3200 train_loss:3.8715 train_time:60953ms step_avg:151.63ms step:413/3200 train_loss:4.2941 train_time:61103ms step_avg:151.62ms step:414/3200 train_loss:3.7381 train_time:61255ms step_avg:151.62ms step:415/3200 train_loss:4.1116 train_time:61407ms step_avg:151.62ms step:416/3200 train_loss:3.8635 train_time:61559ms step_avg:151.62ms step:417/3200 train_loss:3.8726 train_time:61707ms step_avg:151.61ms step:418/3200 train_loss:4.0559 train_time:61858ms step_avg:151.61ms step:419/3200 train_loss:3.8026 train_time:62006ms step_avg:151.61ms step:420/3200 train_loss:3.9186 train_time:62159ms step_avg:151.61ms step:421/3200 train_loss:3.8327 train_time:62309ms step_avg:151.60ms step:422/3200 train_loss:3.7530 train_time:62461ms step_avg:151.60ms step:423/3200 train_loss:3.8876 train_time:62610ms step_avg:151.60ms step:424/3200 train_loss:3.9762 train_time:62760ms step_avg:151.60ms step:425/3200 train_loss:3.7318 train_time:62909ms step_avg:151.59ms step:426/3200 train_loss:3.9143 train_time:63061ms step_avg:151.59ms step:427/3200 train_loss:3.7896 train_time:63209ms step_avg:151.58ms step:428/3200 train_loss:4.0078 train_time:63362ms step_avg:151.58ms step:429/3200 train_loss:3.9263 train_time:63511ms step_avg:151.58ms step:430/3200 train_loss:3.8683 train_time:63662ms step_avg:151.58ms step:431/3200 train_loss:3.8326 train_time:63812ms step_avg:151.57ms step:432/3200 train_loss:3.7428 train_time:63962ms step_avg:151.57ms step:433/3200 train_loss:3.8762 train_time:64112ms step_avg:151.57ms step:434/3200 train_loss:3.9293 train_time:64263ms step_avg:151.56ms step:435/3200 train_loss:3.8761 train_time:64413ms step_avg:151.56ms step:436/3200 train_loss:3.9196 train_time:64564ms step_avg:151.56ms step:437/3200 train_loss:3.9391 train_time:64714ms step_avg:151.56ms step:438/3200 train_loss:3.8251 train_time:64864ms step_avg:151.55ms step:439/3200 train_loss:3.8243 train_time:65014ms step_avg:151.55ms step:440/3200 train_loss:3.8179 train_time:65163ms step_avg:151.54ms step:441/3200 train_loss:3.9944 train_time:65313ms step_avg:151.54ms step:442/3200 train_loss:3.8759 train_time:65465ms step_avg:151.54ms step:443/3200 train_loss:3.8575 train_time:65615ms step_avg:151.54ms step:444/3200 train_loss:3.7524 train_time:65764ms step_avg:151.53ms step:445/3200 train_loss:4.0254 train_time:65914ms step_avg:151.53ms step:446/3200 train_loss:3.9524 train_time:66064ms step_avg:151.52ms step:447/3200 train_loss:3.9426 train_time:66213ms step_avg:151.52ms step:448/3200 train_loss:3.8620 train_time:66364ms step_avg:151.52ms step:449/3200 train_loss:3.9613 train_time:66514ms step_avg:151.51ms step:450/3200 train_loss:3.7958 train_time:66664ms step_avg:151.51ms step:451/3200 train_loss:3.8403 train_time:66814ms step_avg:151.51ms step:452/3200 train_loss:3.6953 train_time:66964ms step_avg:151.50ms step:453/3200 train_loss:3.8141 train_time:67114ms step_avg:151.50ms step:454/3200 train_loss:3.7855 train_time:67264ms step_avg:151.49ms step:455/3200 train_loss:3.7488 train_time:67413ms step_avg:151.49ms step:456/3200 train_loss:3.9571 train_time:67564ms step_avg:151.49ms step:457/3200 train_loss:3.8387 train_time:67713ms step_avg:151.48ms step:458/3200 train_loss:3.9034 train_time:67863ms step_avg:151.48ms step:459/3200 train_loss:3.9431 train_time:68013ms step_avg:151.48ms step:460/3200 train_loss:3.7516 train_time:68164ms step_avg:151.48ms step:461/3200 train_loss:3.9081 train_time:68314ms step_avg:151.47ms step:462/3200 train_loss:3.8154 train_time:68464ms step_avg:151.47ms step:463/3200 train_loss:3.8368 train_time:68615ms step_avg:151.47ms step:464/3200 train_loss:3.8876 train_time:68764ms step_avg:151.46ms step:465/3200 train_loss:3.8288 train_time:68915ms step_avg:151.46ms step:466/3200 train_loss:3.8343 train_time:69064ms step_avg:151.46ms step:467/3200 train_loss:3.9196 train_time:69215ms step_avg:151.45ms step:468/3200 train_loss:3.9398 train_time:69365ms step_avg:151.45ms step:469/3200 train_loss:3.9103 train_time:69514ms step_avg:151.45ms step:470/3200 train_loss:3.8050 train_time:69664ms step_avg:151.44ms step:471/3200 train_loss:3.8817 train_time:69814ms step_avg:151.44ms step:472/3200 train_loss:3.9256 train_time:69964ms step_avg:151.44ms step:473/3200 train_loss:3.8871 train_time:70114ms step_avg:151.43ms step:474/3200 train_loss:3.8334 train_time:70264ms step_avg:151.43ms step:475/3200 train_loss:3.6954 train_time:70415ms step_avg:151.43ms step:476/3200 train_loss:4.1339 train_time:70565ms step_avg:151.43ms step:477/3200 train_loss:3.8767 train_time:70715ms step_avg:151.42ms step:478/3200 train_loss:3.7065 train_time:70864ms step_avg:151.42ms step:479/3200 train_loss:3.9322 train_time:71015ms step_avg:151.42ms step:480/3200 train_loss:3.8787 train_time:71164ms step_avg:151.41ms step:481/3200 train_loss:4.0376 train_time:71315ms step_avg:151.41ms step:482/3200 train_loss:3.8358 train_time:71465ms step_avg:151.41ms step:483/3200 train_loss:3.6436 train_time:71616ms step_avg:151.41ms step:484/3200 train_loss:3.9201 train_time:71765ms step_avg:151.40ms step:485/3200 train_loss:3.7787 train_time:71915ms step_avg:151.40ms step:486/3200 train_loss:3.7894 train_time:72065ms step_avg:151.40ms step:487/3200 train_loss:3.7043 train_time:72215ms step_avg:151.39ms step:488/3200 train_loss:3.7881 train_time:72364ms step_avg:151.39ms step:489/3200 train_loss:3.9814 train_time:72516ms step_avg:151.39ms step:490/3200 train_loss:3.8298 train_time:72666ms step_avg:151.39ms step:491/3200 train_loss:3.7182 train_time:72815ms step_avg:151.38ms step:492/3200 train_loss:3.7311 train_time:72964ms step_avg:151.38ms step:493/3200 train_loss:3.8469 train_time:73113ms step_avg:151.37ms step:494/3200 train_loss:3.6987 train_time:73264ms step_avg:151.37ms step:495/3200 train_loss:3.8300 train_time:73413ms step_avg:151.37ms step:496/3200 train_loss:3.7750 train_time:73564ms step_avg:151.37ms step:497/3200 train_loss:3.6486 train_time:73715ms step_avg:151.37ms step:498/3200 train_loss:3.8444 train_time:73865ms step_avg:151.36ms step:499/3200 train_loss:3.9198 train_time:74015ms step_avg:151.36ms step:500/3200 train_loss:3.9480 train_time:74165ms step_avg:151.36ms step:500/3200 val_loss:3.8237 train_time:74211ms step_avg:151.45ms step:501/3200 train_loss:3.8664 train_time:74324ms step_avg:151.37ms step:502/3200 train_loss:3.9165 train_time:74475ms step_avg:151.37ms step:503/3200 train_loss:3.8552 train_time:74625ms step_avg:151.37ms step:504/3200 train_loss:3.8965 train_time:74773ms step_avg:151.36ms step:505/3200 train_loss:3.8383 train_time:74922ms step_avg:151.36ms step:506/3200 train_loss:3.9345 train_time:75071ms step_avg:151.35ms step:507/3200 train_loss:3.7499 train_time:75221ms step_avg:151.35ms step:508/3200 train_loss:3.8717 train_time:75373ms step_avg:151.35ms step:509/3200 train_loss:3.9529 train_time:75524ms step_avg:151.35ms step:510/3200 train_loss:3.8828 train_time:75676ms step_avg:151.35ms step:511/3200 train_loss:3.7026 train_time:75825ms step_avg:151.35ms step:512/3200 train_loss:3.9005 train_time:75975ms step_avg:151.34ms step:513/3200 train_loss:3.8372 train_time:76123ms step_avg:151.34ms step:514/3200 train_loss:3.7994 train_time:76273ms step_avg:151.33ms step:515/3200 train_loss:3.8838 train_time:76423ms step_avg:151.33ms step:516/3200 train_loss:3.8481 train_time:76575ms step_avg:151.33ms step:517/3200 train_loss:4.2054 train_time:76725ms step_avg:151.33ms step:518/3200 train_loss:3.8071 train_time:76876ms step_avg:151.33ms step:519/3200 train_loss:3.9056 train_time:77025ms step_avg:151.33ms step:520/3200 train_loss:3.7972 train_time:77175ms step_avg:151.32ms step:521/3200 train_loss:3.8117 train_time:77325ms step_avg:151.32ms step:522/3200 train_loss:3.7621 train_time:77476ms step_avg:151.32ms step:523/3200 train_loss:3.7745 train_time:77626ms step_avg:151.32ms step:524/3200 train_loss:4.4101 train_time:77778ms step_avg:151.32ms step:525/3200 train_loss:3.8578 train_time:77927ms step_avg:151.31ms step:526/3200 train_loss:3.7951 train_time:78079ms step_avg:151.32ms step:527/3200 train_loss:3.8136 train_time:78229ms step_avg:151.31ms step:528/3200 train_loss:3.7694 train_time:78380ms step_avg:151.31ms step:529/3200 train_loss:3.7373 train_time:78529ms step_avg:151.31ms step:530/3200 train_loss:3.9631 train_time:78681ms step_avg:151.31ms step:531/3200 train_loss:3.7655 train_time:78831ms step_avg:151.31ms step:532/3200 train_loss:4.0290 train_time:78981ms step_avg:151.31ms step:533/3200 train_loss:3.8459 train_time:79132ms step_avg:151.30ms step:534/3200 train_loss:3.7721 train_time:79282ms step_avg:151.30ms step:535/3200 train_loss:3.7989 train_time:79432ms step_avg:151.30ms step:536/3200 train_loss:3.7329 train_time:79581ms step_avg:151.30ms step:537/3200 train_loss:3.8647 train_time:79731ms step_avg:151.29ms step:538/3200 train_loss:3.8493 train_time:79883ms step_avg:151.29ms step:539/3200 train_loss:3.7463 train_time:80035ms step_avg:151.29ms step:540/3200 train_loss:4.2439 train_time:80184ms step_avg:151.29ms step:541/3200 train_loss:3.7899 train_time:80335ms step_avg:151.29ms step:542/3200 train_loss:3.8970 train_time:80484ms step_avg:151.28ms step:543/3200 train_loss:3.7227 train_time:80636ms step_avg:151.29ms step:544/3200 train_loss:3.6965 train_time:80785ms step_avg:151.28ms step:545/3200 train_loss:3.7795 train_time:80936ms step_avg:151.28ms step:546/3200 train_loss:3.7046 train_time:81086ms step_avg:151.28ms step:547/3200 train_loss:3.7592 train_time:81238ms step_avg:151.28ms step:548/3200 train_loss:3.7610 train_time:81388ms step_avg:151.28ms step:549/3200 train_loss:3.7464 train_time:81539ms step_avg:151.28ms step:550/3200 train_loss:3.8432 train_time:81688ms step_avg:151.27ms step:551/3200 train_loss:3.7349 train_time:81838ms step_avg:151.27ms step:552/3200 train_loss:3.7399 train_time:81986ms step_avg:151.27ms step:553/3200 train_loss:4.0786 train_time:82141ms step_avg:151.27ms step:554/3200 train_loss:3.8676 train_time:82292ms step_avg:151.27ms step:555/3200 train_loss:3.8259 train_time:82443ms step_avg:151.27ms step:556/3200 train_loss:3.7698 train_time:82593ms step_avg:151.27ms step:557/3200 train_loss:3.8110 train_time:82743ms step_avg:151.27ms step:558/3200 train_loss:3.4597 train_time:82893ms step_avg:151.26ms step:559/3200 train_loss:3.7286 train_time:83042ms step_avg:151.26ms step:560/3200 train_loss:3.7710 train_time:83194ms step_avg:151.26ms step:561/3200 train_loss:3.8220 train_time:83344ms step_avg:151.26ms step:562/3200 train_loss:3.7272 train_time:83496ms step_avg:151.26ms step:563/3200 train_loss:3.6731 train_time:83644ms step_avg:151.26ms step:564/3200 train_loss:3.8826 train_time:83795ms step_avg:151.25ms step:565/3200 train_loss:3.6878 train_time:83943ms step_avg:151.25ms step:566/3200 train_loss:3.8041 train_time:84095ms step_avg:151.25ms step:567/3200 train_loss:3.7480 train_time:84395ms step_avg:151.52ms step:568/3200 train_loss:3.7138 train_time:84552ms step_avg:151.53ms step:569/3200 train_loss:3.8006 train_time:84702ms step_avg:151.52ms step:570/3200 train_loss:3.7784 train_time:85026ms step_avg:151.83ms step:571/3200 train_loss:3.8062 train_time:85177ms step_avg:151.83ms step:572/3200 train_loss:3.8860 train_time:85326ms step_avg:151.82ms step:573/3200 train_loss:3.8372 train_time:85475ms step_avg:151.82ms step:574/3200 train_loss:3.8486 train_time:85623ms step_avg:151.81ms step:575/3200 train_loss:3.8954 train_time:85773ms step_avg:151.81ms step:576/3200 train_loss:3.8469 train_time:85924ms step_avg:151.81ms step:577/3200 train_loss:3.8772 train_time:86077ms step_avg:151.81ms step:578/3200 train_loss:3.7955 train_time:86227ms step_avg:151.81ms step:579/3200 train_loss:3.7966 train_time:86377ms step_avg:151.80ms step:580/3200 train_loss:3.7803 train_time:86525ms step_avg:151.80ms step:581/3200 train_loss:3.7185 train_time:86674ms step_avg:151.79ms step:582/3200 train_loss:3.7460 train_time:86823ms step_avg:151.79ms step:583/3200 train_loss:3.9759 train_time:86974ms step_avg:151.79ms step:584/3200 train_loss:3.7392 train_time:87125ms step_avg:151.79ms step:585/3200 train_loss:3.7151 train_time:87277ms step_avg:151.79ms step:586/3200 train_loss:3.8971 train_time:87426ms step_avg:151.78ms step:587/3200 train_loss:3.6506 train_time:87577ms step_avg:151.78ms step:588/3200 train_loss:3.7859 train_time:87725ms step_avg:151.77ms step:589/3200 train_loss:3.7636 train_time:87877ms step_avg:151.77ms step:590/3200 train_loss:4.1121 train_time:88027ms step_avg:151.77ms step:591/3200 train_loss:3.9022 train_time:88178ms step_avg:151.77ms step:592/3200 train_loss:3.6382 train_time:88329ms step_avg:151.77ms step:593/3200 train_loss:3.6583 train_time:88481ms step_avg:151.77ms step:594/3200 train_loss:3.6364 train_time:88630ms step_avg:151.76ms step:595/3200 train_loss:3.6852 train_time:88780ms step_avg:151.76ms step:596/3200 train_loss:4.0568 train_time:88930ms step_avg:151.76ms step:597/3200 train_loss:3.7739 train_time:89081ms step_avg:151.76ms step:598/3200 train_loss:3.6997 train_time:89230ms step_avg:151.75ms step:599/3200 train_loss:3.7810 train_time:89381ms step_avg:151.75ms step:600/3200 train_loss:3.5967 train_time:89532ms step_avg:151.75ms step:601/3200 train_loss:3.7213 train_time:89682ms step_avg:151.75ms step:602/3200 train_loss:3.7507 train_time:89833ms step_avg:151.74ms step:603/3200 train_loss:3.7767 train_time:89983ms step_avg:151.74ms step:604/3200 train_loss:3.9037 train_time:90134ms step_avg:151.74ms step:605/3200 train_loss:3.7482 train_time:90283ms step_avg:151.74ms step:606/3200 train_loss:3.7406 train_time:90434ms step_avg:151.73ms step:607/3200 train_loss:3.6882 train_time:90582ms step_avg:151.73ms step:608/3200 train_loss:3.9442 train_time:90732ms step_avg:151.73ms step:609/3200 train_loss:3.7624 train_time:90883ms step_avg:151.73ms step:610/3200 train_loss:3.7381 train_time:91034ms step_avg:151.72ms step:611/3200 train_loss:3.8312 train_time:91184ms step_avg:151.72ms step:612/3200 train_loss:3.7418 train_time:91335ms step_avg:151.72ms step:613/3200 train_loss:3.7262 train_time:91484ms step_avg:151.71ms step:614/3200 train_loss:3.8881 train_time:91636ms step_avg:151.71ms step:615/3200 train_loss:3.8398 train_time:91784ms step_avg:151.71ms step:616/3200 train_loss:3.8170 train_time:91936ms step_avg:151.71ms step:617/3200 train_loss:3.7383 train_time:92084ms step_avg:151.70ms step:618/3200 train_loss:3.6934 train_time:92235ms step_avg:151.70ms step:619/3200 train_loss:3.8039 train_time:92385ms step_avg:151.70ms step:620/3200 train_loss:3.6963 train_time:92536ms step_avg:151.70ms step:621/3200 train_loss:3.7200 train_time:92685ms step_avg:151.69ms step:622/3200 train_loss:4.0243 train_time:92835ms step_avg:151.69ms step:623/3200 train_loss:3.7101 train_time:92984ms step_avg:151.69ms step:624/3200 train_loss:3.7360 train_time:93135ms step_avg:151.69ms step:625/3200 train_loss:3.8188 train_time:93284ms step_avg:151.68ms step:625/3200 val_loss:3.7495 train_time:93332ms step_avg:151.76ms step:626/3200 train_loss:3.8419 train_time:93442ms step_avg:151.69ms step:627/3200 train_loss:3.8704 train_time:93594ms step_avg:151.69ms step:628/3200 train_loss:3.8510 train_time:93744ms step_avg:151.69ms step:629/3200 train_loss:3.8891 train_time:93894ms step_avg:151.69ms step:630/3200 train_loss:3.7141 train_time:94042ms step_avg:151.68ms step:631/3200 train_loss:3.8428 train_time:94191ms step_avg:151.68ms step:632/3200 train_loss:3.8691 train_time:94340ms step_avg:151.67ms step:633/3200 train_loss:3.7718 train_time:94495ms step_avg:151.68ms step:634/3200 train_loss:3.7102 train_time:94645ms step_avg:151.67ms step:635/3200 train_loss:3.8094 train_time:94797ms step_avg:151.67ms step:636/3200 train_loss:4.0707 train_time:94945ms step_avg:151.67ms step:637/3200 train_loss:3.6580 train_time:95094ms step_avg:151.67ms step:638/3200 train_loss:3.4814 train_time:95243ms step_avg:151.66ms step:639/3200 train_loss:3.7066 train_time:95395ms step_avg:151.66ms step:640/3200 train_loss:3.7444 train_time:95545ms step_avg:151.66ms step:641/3200 train_loss:3.6918 train_time:95697ms step_avg:151.66ms step:642/3200 train_loss:3.7013 train_time:95847ms step_avg:151.66ms step:643/3200 train_loss:3.7405 train_time:95997ms step_avg:151.65ms step:644/3200 train_loss:3.7332 train_time:96145ms step_avg:151.65ms step:645/3200 train_loss:3.6788 train_time:96296ms step_avg:151.65ms step:646/3200 train_loss:3.8949 train_time:96445ms step_avg:151.64ms step:647/3200 train_loss:3.7966 train_time:96596ms step_avg:151.64ms step:648/3200 train_loss:3.7926 train_time:96747ms step_avg:151.64ms step:649/3200 train_loss:3.8239 train_time:96898ms step_avg:151.64ms step:650/3200 train_loss:3.8880 train_time:97048ms step_avg:151.64ms step:651/3200 train_loss:3.7504 train_time:97197ms step_avg:151.63ms step:652/3200 train_loss:3.8815 train_time:97346ms step_avg:151.63ms step:653/3200 train_loss:3.7035 train_time:97497ms step_avg:151.63ms step:654/3200 train_loss:3.7865 train_time:97646ms step_avg:151.62ms step:655/3200 train_loss:3.5513 train_time:97797ms step_avg:151.62ms step:656/3200 train_loss:3.6934 train_time:97947ms step_avg:151.62ms step:657/3200 train_loss:3.7068 train_time:98098ms step_avg:151.62ms step:658/3200 train_loss:3.6347 train_time:98248ms step_avg:151.62ms step:659/3200 train_loss:3.8072 train_time:98397ms step_avg:151.61ms step:660/3200 train_loss:3.7084 train_time:98546ms step_avg:151.61ms step:661/3200 train_loss:3.8032 train_time:98697ms step_avg:151.61ms step:662/3200 train_loss:3.8731 train_time:98846ms step_avg:151.60ms step:663/3200 train_loss:3.7920 train_time:98997ms step_avg:151.60ms step:664/3200 train_loss:3.6669 train_time:99147ms step_avg:151.60ms step:665/3200 train_loss:3.7466 train_time:99297ms step_avg:151.60ms step:666/3200 train_loss:3.6221 train_time:99447ms step_avg:151.60ms step:667/3200 train_loss:3.9084 train_time:99597ms step_avg:151.59ms step:668/3200 train_loss:3.7380 train_time:99747ms step_avg:151.59ms step:669/3200 train_loss:3.7516 train_time:99897ms step_avg:151.59ms step:670/3200 train_loss:3.6073 train_time:100046ms step_avg:151.59ms step:671/3200 train_loss:3.7200 train_time:100197ms step_avg:151.58ms step:672/3200 train_loss:3.6807 train_time:100346ms step_avg:151.58ms step:673/3200 train_loss:3.6978 train_time:100497ms step_avg:151.58ms step:674/3200 train_loss:3.9767 train_time:100646ms step_avg:151.57ms step:675/3200 train_loss:3.7650 train_time:100797ms step_avg:151.57ms step:676/3200 train_loss:3.8346 train_time:100946ms step_avg:151.57ms step:677/3200 train_loss:3.6189 train_time:101097ms step_avg:151.57ms step:678/3200 train_loss:3.7223 train_time:101248ms step_avg:151.57ms step:679/3200 train_loss:3.6706 train_time:101397ms step_avg:151.57ms step:680/3200 train_loss:3.8076 train_time:101548ms step_avg:151.56ms step:681/3200 train_loss:3.7060 train_time:101697ms step_avg:151.56ms step:682/3200 train_loss:3.7354 train_time:101847ms step_avg:151.56ms step:683/3200 train_loss:3.8172 train_time:101997ms step_avg:151.56ms step:684/3200 train_loss:3.8559 train_time:102147ms step_avg:151.55ms step:685/3200 train_loss:3.7612 train_time:102297ms step_avg:151.55ms step:686/3200 train_loss:3.8255 train_time:102446ms step_avg:151.55ms step:687/3200 train_loss:3.7553 train_time:102598ms step_avg:151.55ms step:688/3200 train_loss:3.7975 train_time:102749ms step_avg:151.55ms step:689/3200 train_loss:3.3875 train_time:102899ms step_avg:151.54ms step:690/3200 train_loss:3.5394 train_time:103049ms step_avg:151.54ms step:691/3200 train_loss:3.6751 train_time:103199ms step_avg:151.54ms step:692/3200 train_loss:3.5545 train_time:103349ms step_avg:151.54ms step:693/3200 train_loss:3.7634 train_time:103498ms step_avg:151.53ms step:694/3200 train_loss:3.7812 train_time:103649ms step_avg:151.53ms step:695/3200 train_loss:3.6687 train_time:103799ms step_avg:151.53ms step:696/3200 train_loss:3.6679 train_time:103949ms step_avg:151.53ms step:697/3200 train_loss:3.9818 train_time:104098ms step_avg:151.53ms step:698/3200 train_loss:3.7248 train_time:104248ms step_avg:151.52ms step:699/3200 train_loss:3.7673 train_time:104398ms step_avg:151.52ms step:700/3200 train_loss:3.9325 train_time:104548ms step_avg:151.52ms step:701/3200 train_loss:3.6965 train_time:104698ms step_avg:151.52ms step:702/3200 train_loss:3.6624 train_time:104847ms step_avg:151.51ms step:703/3200 train_loss:3.6453 train_time:104997ms step_avg:151.51ms step:704/3200 train_loss:3.6094 train_time:105147ms step_avg:151.51ms step:705/3200 train_loss:3.6882 train_time:105297ms step_avg:151.51ms step:706/3200 train_loss:3.6836 train_time:105447ms step_avg:151.50ms step:707/3200 train_loss:3.6987 train_time:105597ms step_avg:151.50ms step:708/3200 train_loss:3.7709 train_time:105747ms step_avg:151.50ms step:709/3200 train_loss:3.7187 train_time:105897ms step_avg:151.50ms step:710/3200 train_loss:3.6972 train_time:106048ms step_avg:151.50ms step:711/3200 train_loss:3.6658 train_time:106198ms step_avg:151.50ms step:712/3200 train_loss:3.7111 train_time:106349ms step_avg:151.49ms step:713/3200 train_loss:3.7714 train_time:106498ms step_avg:151.49ms step:714/3200 train_loss:3.7819 train_time:106648ms step_avg:151.49ms step:715/3200 train_loss:3.6902 train_time:106798ms step_avg:151.49ms step:716/3200 train_loss:3.6889 train_time:106948ms step_avg:151.48ms step:717/3200 train_loss:3.7156 train_time:107098ms step_avg:151.48ms step:718/3200 train_loss:3.8603 train_time:107248ms step_avg:151.48ms step:719/3200 train_loss:3.7161 train_time:107398ms step_avg:151.48ms step:720/3200 train_loss:3.7954 train_time:107548ms step_avg:151.48ms step:721/3200 train_loss:3.9574 train_time:107697ms step_avg:151.47ms step:722/3200 train_loss:3.5869 train_time:107848ms step_avg:151.47ms step:723/3200 train_loss:3.8500 train_time:107998ms step_avg:151.47ms step:724/3200 train_loss:3.9053 train_time:108147ms step_avg:151.47ms step:725/3200 train_loss:3.6890 train_time:108297ms step_avg:151.46ms step:726/3200 train_loss:3.7700 train_time:108447ms step_avg:151.46ms step:727/3200 train_loss:3.6629 train_time:108597ms step_avg:151.46ms step:728/3200 train_loss:3.6903 train_time:108747ms step_avg:151.46ms step:729/3200 train_loss:3.8630 train_time:108897ms step_avg:151.46ms step:730/3200 train_loss:3.8066 train_time:109048ms step_avg:151.46ms step:731/3200 train_loss:3.7986 train_time:109197ms step_avg:151.45ms step:732/3200 train_loss:3.6845 train_time:109348ms step_avg:151.45ms step:733/3200 train_loss:3.7169 train_time:109498ms step_avg:151.45ms step:734/3200 train_loss:3.9462 train_time:109648ms step_avg:151.45ms step:735/3200 train_loss:3.6876 train_time:109798ms step_avg:151.45ms step:736/3200 train_loss:3.7525 train_time:109948ms step_avg:151.44ms step:737/3200 train_loss:3.8720 train_time:110098ms step_avg:151.44ms step:738/3200 train_loss:3.7851 train_time:110250ms step_avg:151.44ms step:739/3200 train_loss:3.7274 train_time:110399ms step_avg:151.44ms step:740/3200 train_loss:3.6232 train_time:110551ms step_avg:151.44ms step:741/3200 train_loss:4.2636 train_time:110700ms step_avg:151.44ms step:742/3200 train_loss:3.6276 train_time:110851ms step_avg:151.44ms step:743/3200 train_loss:3.6931 train_time:111000ms step_avg:151.43ms step:744/3200 train_loss:3.7084 train_time:111152ms step_avg:151.43ms step:745/3200 train_loss:3.7686 train_time:111301ms step_avg:151.43ms step:746/3200 train_loss:3.7318 train_time:111452ms step_avg:151.43ms step:747/3200 train_loss:3.7270 train_time:111602ms step_avg:151.43ms step:748/3200 train_loss:3.7570 train_time:111754ms step_avg:151.43ms step:749/3200 train_loss:3.6840 train_time:111903ms step_avg:151.42ms step:750/3200 train_loss:3.6867 train_time:112056ms step_avg:151.43ms step:750/3200 val_loss:3.6970 train_time:112102ms step_avg:151.49ms step:751/3200 train_loss:3.7222 train_time:112212ms step_avg:151.43ms step:752/3200 train_loss:3.6893 train_time:112363ms step_avg:151.43ms step:753/3200 train_loss:3.7286 train_time:112512ms step_avg:151.43ms step:754/3200 train_loss:3.7507 train_time:112660ms step_avg:151.43ms step:755/3200 train_loss:3.7103 train_time:112808ms step_avg:151.42ms step:756/3200 train_loss:3.7861 train_time:113106ms step_avg:151.62ms step:757/3200 train_loss:3.6189 train_time:113263ms step_avg:151.62ms step:758/3200 train_loss:3.8532 train_time:113412ms step_avg:151.62ms step:759/3200 train_loss:3.7745 train_time:113561ms step_avg:151.62ms step:760/3200 train_loss:3.7105 train_time:113882ms step_avg:151.84ms step:761/3200 train_loss:3.8175 train_time:114032ms step_avg:151.84ms step:762/3200 train_loss:3.5288 train_time:114180ms step_avg:151.83ms step:763/3200 train_loss:3.6750 train_time:114329ms step_avg:151.83ms step:764/3200 train_loss:3.7891 train_time:114479ms step_avg:151.83ms step:765/3200 train_loss:3.4494 train_time:114627ms step_avg:151.82ms step:766/3200 train_loss:3.8683 train_time:114778ms step_avg:151.82ms step:767/3200 train_loss:3.7140 train_time:114931ms step_avg:151.82ms step:768/3200 train_loss:3.6845 train_time:115083ms step_avg:151.82ms step:769/3200 train_loss:3.7004 train_time:115234ms step_avg:151.82ms step:770/3200 train_loss:3.7226 train_time:115383ms step_avg:151.82ms step:771/3200 train_loss:3.7771 train_time:115533ms step_avg:151.82ms step:772/3200 train_loss:4.0068 train_time:115682ms step_avg:151.81ms step:773/3200 train_loss:3.5936 train_time:115832ms step_avg:151.81ms step:774/3200 train_loss:3.7745 train_time:115983ms step_avg:151.81ms step:775/3200 train_loss:3.7639 train_time:116134ms step_avg:151.81ms step:776/3200 train_loss:3.7326 train_time:116283ms step_avg:151.81ms step:777/3200 train_loss:3.5330 train_time:116433ms step_avg:151.80ms step:778/3200 train_loss:3.5351 train_time:116583ms step_avg:151.80ms step:779/3200 train_loss:3.6113 train_time:116734ms step_avg:151.80ms step:780/3200 train_loss:3.6949 train_time:116883ms step_avg:151.80ms step:781/3200 train_loss:3.7277 train_time:117035ms step_avg:151.80ms step:782/3200 train_loss:3.7899 train_time:117186ms step_avg:151.79ms step:783/3200 train_loss:3.7059 train_time:117336ms step_avg:151.79ms step:784/3200 train_loss:3.6953 train_time:117485ms step_avg:151.79ms step:785/3200 train_loss:3.7031 train_time:117635ms step_avg:151.79ms step:786/3200 train_loss:3.6784 train_time:117784ms step_avg:151.78ms step:787/3200 train_loss:3.5781 train_time:117934ms step_avg:151.78ms step:788/3200 train_loss:3.8421 train_time:118084ms step_avg:151.78ms step:789/3200 train_loss:3.6262 train_time:118236ms step_avg:151.78ms step:790/3200 train_loss:3.6863 train_time:118385ms step_avg:151.78ms step:791/3200 train_loss:3.7555 train_time:118536ms step_avg:151.77ms step:792/3200 train_loss:3.8822 train_time:118686ms step_avg:151.77ms step:793/3200 train_loss:3.8906 train_time:118836ms step_avg:151.77ms step:794/3200 train_loss:3.6022 train_time:118984ms step_avg:151.77ms step:795/3200 train_loss:3.7258 train_time:119137ms step_avg:151.77ms step:796/3200 train_loss:3.7843 train_time:119288ms step_avg:151.77ms step:797/3200 train_loss:3.9090 train_time:119438ms step_avg:151.76ms step:798/3200 train_loss:3.6463 train_time:119588ms step_avg:151.76ms step:799/3200 train_loss:3.7899 train_time:119738ms step_avg:151.76ms step:800/3200 train_loss:3.6814 train_time:119887ms step_avg:151.76ms step:801/3200 train_loss:3.6611 train_time:120039ms step_avg:151.76ms step:802/3200 train_loss:3.7556 train_time:120189ms step_avg:151.75ms step:803/3200 train_loss:3.6225 train_time:120341ms step_avg:151.75ms step:804/3200 train_loss:3.6515 train_time:120491ms step_avg:151.75ms step:805/3200 train_loss:3.7577 train_time:120641ms step_avg:151.75ms step:806/3200 train_loss:3.6576 train_time:120790ms step_avg:151.75ms step:807/3200 train_loss:3.6780 train_time:120941ms step_avg:151.75ms step:808/3200 train_loss:3.7717 train_time:121092ms step_avg:151.74ms step:809/3200 train_loss:3.6894 train_time:121242ms step_avg:151.74ms step:810/3200 train_loss:3.6165 train_time:121392ms step_avg:151.74ms step:811/3200 train_loss:3.6944 train_time:121542ms step_avg:151.74ms step:812/3200 train_loss:3.7181 train_time:121692ms step_avg:151.74ms step:813/3200 train_loss:3.7211 train_time:121842ms step_avg:151.73ms step:814/3200 train_loss:3.7561 train_time:121992ms step_avg:151.73ms step:815/3200 train_loss:3.6975 train_time:122142ms step_avg:151.73ms step:816/3200 train_loss:3.6839 train_time:122294ms step_avg:151.73ms step:817/3200 train_loss:3.7989 train_time:122443ms step_avg:151.73ms step:818/3200 train_loss:3.8844 train_time:122594ms step_avg:151.73ms step:819/3200 train_loss:3.6520 train_time:122744ms step_avg:151.72ms step:820/3200 train_loss:3.8463 train_time:122894ms step_avg:151.72ms step:821/3200 train_loss:3.6397 train_time:123043ms step_avg:151.72ms step:822/3200 train_loss:3.6790 train_time:123194ms step_avg:151.72ms step:823/3200 train_loss:3.8032 train_time:123344ms step_avg:151.71ms step:824/3200 train_loss:3.7126 train_time:123495ms step_avg:151.71ms step:825/3200 train_loss:3.6379 train_time:123644ms step_avg:151.71ms step:826/3200 train_loss:3.7402 train_time:123796ms step_avg:151.71ms step:827/3200 train_loss:3.6333 train_time:123946ms step_avg:151.71ms step:828/3200 train_loss:3.8600 train_time:124098ms step_avg:151.71ms step:829/3200 train_loss:3.7463 train_time:124247ms step_avg:151.71ms step:830/3200 train_loss:3.7953 train_time:124398ms step_avg:151.71ms step:831/3200 train_loss:3.6664 train_time:124549ms step_avg:151.70ms step:832/3200 train_loss:3.7122 train_time:124700ms step_avg:151.70ms step:833/3200 train_loss:3.6452 train_time:124852ms step_avg:151.70ms step:834/3200 train_loss:3.7711 train_time:125001ms step_avg:151.70ms step:835/3200 train_loss:3.6064 train_time:125152ms step_avg:151.70ms step:836/3200 train_loss:3.5866 train_time:125302ms step_avg:151.70ms step:837/3200 train_loss:3.8421 train_time:125452ms step_avg:151.70ms step:838/3200 train_loss:3.5441 train_time:125602ms step_avg:151.69ms step:839/3200 train_loss:3.7221 train_time:125753ms step_avg:151.69ms step:840/3200 train_loss:3.5535 train_time:125902ms step_avg:151.69ms step:841/3200 train_loss:3.5979 train_time:126053ms step_avg:151.69ms step:842/3200 train_loss:3.6909 train_time:126203ms step_avg:151.69ms step:843/3200 train_loss:3.7062 train_time:126355ms step_avg:151.69ms step:844/3200 train_loss:3.7048 train_time:126504ms step_avg:151.68ms step:845/3200 train_loss:3.5589 train_time:126654ms step_avg:151.68ms step:846/3200 train_loss:3.7998 train_time:126804ms step_avg:151.68ms step:847/3200 train_loss:3.6614 train_time:126954ms step_avg:151.68ms step:848/3200 train_loss:3.6183 train_time:127103ms step_avg:151.67ms step:849/3200 train_loss:3.7603 train_time:127254ms step_avg:151.67ms step:850/3200 train_loss:3.6273 train_time:127404ms step_avg:151.67ms step:851/3200 train_loss:3.5723 train_time:127557ms step_avg:151.67ms step:852/3200 train_loss:3.8664 train_time:127707ms step_avg:151.67ms step:853/3200 train_loss:3.5777 train_time:127857ms step_avg:151.67ms step:854/3200 train_loss:3.6910 train_time:128006ms step_avg:151.67ms step:855/3200 train_loss:3.7719 train_time:128156ms step_avg:151.66ms step:856/3200 train_loss:3.6487 train_time:128305ms step_avg:151.66ms step:857/3200 train_loss:3.6741 train_time:128457ms step_avg:151.66ms step:858/3200 train_loss:3.7308 train_time:128606ms step_avg:151.66ms step:859/3200 train_loss:3.6140 train_time:128757ms step_avg:151.66ms step:860/3200 train_loss:3.6900 train_time:128907ms step_avg:151.66ms step:861/3200 train_loss:3.7204 train_time:129058ms step_avg:151.65ms step:862/3200 train_loss:3.7617 train_time:129207ms step_avg:151.65ms step:863/3200 train_loss:3.7240 train_time:129359ms step_avg:151.65ms step:864/3200 train_loss:3.6998 train_time:129508ms step_avg:151.65ms step:865/3200 train_loss:3.5219 train_time:129661ms step_avg:151.65ms step:866/3200 train_loss:3.7199 train_time:129810ms step_avg:151.65ms step:867/3200 train_loss:3.9898 train_time:129961ms step_avg:151.65ms step:868/3200 train_loss:3.5814 train_time:130110ms step_avg:151.64ms step:869/3200 train_loss:3.7589 train_time:130261ms step_avg:151.64ms step:870/3200 train_loss:3.7404 train_time:130411ms step_avg:151.64ms step:871/3200 train_loss:3.5777 train_time:130561ms step_avg:151.64ms step:872/3200 train_loss:3.5569 train_time:130711ms step_avg:151.64ms step:873/3200 train_loss:3.7847 train_time:130862ms step_avg:151.64ms step:874/3200 train_loss:3.5830 train_time:131012ms step_avg:151.63ms step:875/3200 train_loss:3.3188 train_time:131163ms step_avg:151.63ms step:875/3200 val_loss:3.6529 train_time:131208ms step_avg:151.69ms step:876/3200 train_loss:3.7703 train_time:131319ms step_avg:151.64ms step:877/3200 train_loss:3.5785 train_time:131473ms step_avg:151.64ms step:878/3200 train_loss:3.7476 train_time:131623ms step_avg:151.64ms step:879/3200 train_loss:3.6091 train_time:131770ms step_avg:151.63ms step:880/3200 train_loss:3.7861 train_time:131920ms step_avg:151.63ms step:881/3200 train_loss:3.4512 train_time:132068ms step_avg:151.63ms step:882/3200 train_loss:3.6203 train_time:132219ms step_avg:151.63ms step:883/3200 train_loss:3.8199 train_time:132373ms step_avg:151.63ms step:884/3200 train_loss:3.9745 train_time:132525ms step_avg:151.63ms step:885/3200 train_loss:3.6951 train_time:132673ms step_avg:151.63ms step:886/3200 train_loss:3.6194 train_time:132823ms step_avg:151.62ms step:887/3200 train_loss:3.7040 train_time:132972ms step_avg:151.62ms step:888/3200 train_loss:4.2091 train_time:133122ms step_avg:151.62ms step:889/3200 train_loss:3.9665 train_time:133271ms step_avg:151.62ms step:890/3200 train_loss:3.6493 train_time:133422ms step_avg:151.62ms step:891/3200 train_loss:3.6628 train_time:133572ms step_avg:151.61ms step:892/3200 train_loss:3.4935 train_time:133723ms step_avg:151.61ms step:893/3200 train_loss:3.8443 train_time:133872ms step_avg:151.61ms step:894/3200 train_loss:3.5600 train_time:134022ms step_avg:151.61ms step:895/3200 train_loss:3.8153 train_time:134171ms step_avg:151.61ms step:896/3200 train_loss:3.8205 train_time:134322ms step_avg:151.60ms step:897/3200 train_loss:3.6220 train_time:134471ms step_avg:151.60ms step:898/3200 train_loss:3.6669 train_time:134624ms step_avg:151.60ms step:899/3200 train_loss:3.7208 train_time:134773ms step_avg:151.60ms step:900/3200 train_loss:3.6054 train_time:134924ms step_avg:151.60ms step:901/3200 train_loss:3.5454 train_time:135073ms step_avg:151.60ms step:902/3200 train_loss:3.7540 train_time:135224ms step_avg:151.60ms step:903/3200 train_loss:3.7601 train_time:135374ms step_avg:151.59ms step:904/3200 train_loss:3.6616 train_time:135525ms step_avg:151.59ms step:905/3200 train_loss:3.6330 train_time:135675ms step_avg:151.59ms step:906/3200 train_loss:3.6216 train_time:135828ms step_avg:151.59ms step:907/3200 train_loss:3.8521 train_time:135979ms step_avg:151.59ms step:908/3200 train_loss:3.6389 train_time:136129ms step_avg:151.59ms step:909/3200 train_loss:3.6811 train_time:136279ms step_avg:151.59ms step:910/3200 train_loss:3.5922 train_time:136429ms step_avg:151.59ms step:911/3200 train_loss:3.6736 train_time:136579ms step_avg:151.59ms step:912/3200 train_loss:3.7506 train_time:136729ms step_avg:151.58ms step:913/3200 train_loss:3.7351 train_time:136881ms step_avg:151.58ms step:914/3200 train_loss:3.6147 train_time:137031ms step_avg:151.58ms step:915/3200 train_loss:3.8676 train_time:137182ms step_avg:151.58ms step:916/3200 train_loss:3.6609 train_time:137332ms step_avg:151.58ms step:917/3200 train_loss:3.7514 train_time:137482ms step_avg:151.58ms step:918/3200 train_loss:3.7274 train_time:137631ms step_avg:151.58ms step:919/3200 train_loss:4.9448 train_time:137782ms step_avg:151.58ms step:920/3200 train_loss:3.6431 train_time:137932ms step_avg:151.57ms step:921/3200 train_loss:3.7047 train_time:138084ms step_avg:151.57ms step:922/3200 train_loss:3.6603 train_time:138233ms step_avg:151.57ms step:923/3200 train_loss:3.7087 train_time:138384ms step_avg:151.57ms step:924/3200 train_loss:3.7291 train_time:138533ms step_avg:151.57ms step:925/3200 train_loss:3.8064 train_time:138685ms step_avg:151.57ms step:926/3200 train_loss:3.7842 train_time:138834ms step_avg:151.57ms step:927/3200 train_loss:3.6866 train_time:138986ms step_avg:151.57ms step:928/3200 train_loss:3.6730 train_time:139135ms step_avg:151.56ms step:929/3200 train_loss:3.9010 train_time:139287ms step_avg:151.56ms step:930/3200 train_loss:3.7404 train_time:139436ms step_avg:151.56ms step:931/3200 train_loss:3.5326 train_time:139587ms step_avg:151.56ms step:932/3200 train_loss:3.6194 train_time:139736ms step_avg:151.56ms step:933/3200 train_loss:3.8026 train_time:139887ms step_avg:151.56ms step:934/3200 train_loss:3.5215 train_time:140038ms step_avg:151.56ms step:935/3200 train_loss:3.7034 train_time:140190ms step_avg:151.56ms step:936/3200 train_loss:3.5791 train_time:140340ms step_avg:151.55ms step:937/3200 train_loss:3.6425 train_time:140490ms step_avg:151.55ms step:938/3200 train_loss:3.7361 train_time:140641ms step_avg:151.55ms step:939/3200 train_loss:3.6762 train_time:140790ms step_avg:151.55ms step:940/3200 train_loss:3.8259 train_time:140941ms step_avg:151.55ms step:941/3200 train_loss:3.6104 train_time:141091ms step_avg:151.55ms step:942/3200 train_loss:3.6745 train_time:141242ms step_avg:151.55ms step:943/3200 train_loss:3.4787 train_time:141391ms step_avg:151.54ms step:944/3200 train_loss:3.8298 train_time:141541ms step_avg:151.54ms step:945/3200 train_loss:3.5366 train_time:141838ms step_avg:151.70ms step:946/3200 train_loss:3.5589 train_time:141995ms step_avg:151.70ms step:947/3200 train_loss:5.1850 train_time:142143ms step_avg:151.70ms step:948/3200 train_loss:3.7345 train_time:142292ms step_avg:151.70ms step:949/3200 train_loss:3.6216 train_time:142441ms step_avg:151.69ms step:950/3200 train_loss:3.5206 train_time:142761ms step_avg:151.87ms step:951/3200 train_loss:3.5756 train_time:142907ms step_avg:151.87ms step:952/3200 train_loss:3.5409 train_time:143057ms step_avg:151.87ms step:953/3200 train_loss:3.6052 train_time:143207ms step_avg:151.86ms step:954/3200 train_loss:3.6840 train_time:143355ms step_avg:151.86ms step:955/3200 train_loss:3.5678 train_time:143504ms step_avg:151.86ms step:956/3200 train_loss:3.6022 train_time:143654ms step_avg:151.85ms step:957/3200 train_loss:3.5724 train_time:143807ms step_avg:151.86ms step:958/3200 train_loss:3.6315 train_time:143960ms step_avg:151.86ms step:959/3200 train_loss:3.6231 train_time:144110ms step_avg:151.85ms step:960/3200 train_loss:3.6406 train_time:144259ms step_avg:151.85ms step:961/3200 train_loss:3.5339 train_time:144408ms step_avg:151.85ms step:962/3200 train_loss:3.7798 train_time:144556ms step_avg:151.84ms step:963/3200 train_loss:3.7319 train_time:144709ms step_avg:151.85ms step:964/3200 train_loss:3.6181 train_time:144861ms step_avg:151.85ms step:965/3200 train_loss:3.5799 train_time:145011ms step_avg:151.84ms step:966/3200 train_loss:3.6148 train_time:145161ms step_avg:151.84ms step:967/3200 train_loss:3.8345 train_time:145310ms step_avg:151.84ms step:968/3200 train_loss:3.6592 train_time:145459ms step_avg:151.84ms step:969/3200 train_loss:3.6469 train_time:145608ms step_avg:151.83ms step:970/3200 train_loss:3.7068 train_time:145759ms step_avg:151.83ms step:971/3200 train_loss:3.5209 train_time:145910ms step_avg:151.83ms step:972/3200 train_loss:3.6692 train_time:146063ms step_avg:151.83ms step:973/3200 train_loss:3.6303 train_time:146212ms step_avg:151.83ms step:974/3200 train_loss:3.6684 train_time:146363ms step_avg:151.83ms step:975/3200 train_loss:3.7418 train_time:146511ms step_avg:151.83ms step:976/3200 train_loss:3.6148 train_time:146662ms step_avg:151.82ms step:977/3200 train_loss:3.8148 train_time:146811ms step_avg:151.82ms step:978/3200 train_loss:3.7023 train_time:146964ms step_avg:151.82ms step:979/3200 train_loss:3.5291 train_time:147114ms step_avg:151.82ms step:980/3200 train_loss:3.8111 train_time:147266ms step_avg:151.82ms step:981/3200 train_loss:3.5454 train_time:147413ms step_avg:151.82ms step:982/3200 train_loss:3.7168 train_time:147562ms step_avg:151.81ms step:983/3200 train_loss:3.6896 train_time:147711ms step_avg:151.81ms step:984/3200 train_loss:3.6903 train_time:147863ms step_avg:151.81ms step:985/3200 train_loss:3.6379 train_time:148013ms step_avg:151.81ms step:986/3200 train_loss:3.7249 train_time:148165ms step_avg:151.81ms step:987/3200 train_loss:3.5462 train_time:148315ms step_avg:151.81ms step:988/3200 train_loss:3.6188 train_time:148466ms step_avg:151.81ms step:989/3200 train_loss:3.6311 train_time:148616ms step_avg:151.80ms step:990/3200 train_loss:3.5610 train_time:148767ms step_avg:151.80ms step:991/3200 train_loss:3.7861 train_time:148917ms step_avg:151.80ms step:992/3200 train_loss:3.5982 train_time:149069ms step_avg:151.80ms step:993/3200 train_loss:3.5712 train_time:149220ms step_avg:151.80ms step:994/3200 train_loss:3.6442 train_time:149370ms step_avg:151.80ms step:995/3200 train_loss:3.7328 train_time:149520ms step_avg:151.80ms step:996/3200 train_loss:3.6753 train_time:149670ms step_avg:151.79ms step:997/3200 train_loss:3.5885 train_time:149820ms step_avg:151.79ms step:998/3200 train_loss:3.9261 train_time:149970ms step_avg:151.79ms step:999/3200 train_loss:3.5966 train_time:150123ms step_avg:151.79ms step:1000/3200 train_loss:3.7261 train_time:150273ms step_avg:151.79ms step:1000/3200 val_loss:3.6163 train_time:150320ms step_avg:151.84ms step:1001/3200 train_loss:3.5892 train_time:150430ms step_avg:151.80ms step:1002/3200 train_loss:3.6404 train_time:150582ms step_avg:151.80ms step:1003/3200 train_loss:3.5251 train_time:150731ms step_avg:151.79ms step:1004/3200 train_loss:3.7090 train_time:150879ms step_avg:151.79ms step:1005/3200 train_loss:3.7585 train_time:151028ms step_avg:151.79ms step:1006/3200 train_loss:3.5312 train_time:151177ms step_avg:151.78ms step:1007/3200 train_loss:3.6142 train_time:151326ms step_avg:151.78ms step:1008/3200 train_loss:3.5808 train_time:151480ms step_avg:151.78ms step:1009/3200 train_loss:3.7010 train_time:151631ms step_avg:151.78ms step:1010/3200 train_loss:3.8057 train_time:151780ms step_avg:151.78ms step:1011/3200 train_loss:3.7016 train_time:151930ms step_avg:151.78ms step:1012/3200 train_loss:3.6628 train_time:152080ms step_avg:151.78ms step:1013/3200 train_loss:3.5260 train_time:152230ms step_avg:151.77ms step:1014/3200 train_loss:3.6677 train_time:152379ms step_avg:151.77ms step:1015/3200 train_loss:3.7742 train_time:152530ms step_avg:151.77ms step:1016/3200 train_loss:3.4870 train_time:152680ms step_avg:151.77ms step:1017/3200 train_loss:3.5788 train_time:152829ms step_avg:151.77ms step:1018/3200 train_loss:3.5755 train_time:152979ms step_avg:151.76ms step:1019/3200 train_loss:3.5307 train_time:153129ms step_avg:151.76ms step:1020/3200 train_loss:3.6694 train_time:153279ms step_avg:151.76ms step:1021/3200 train_loss:3.5725 train_time:153430ms step_avg:151.76ms step:1022/3200 train_loss:3.5082 train_time:153580ms step_avg:151.76ms step:1023/3200 train_loss:3.6168 train_time:153731ms step_avg:151.76ms step:1024/3200 train_loss:3.6420 train_time:153880ms step_avg:151.76ms step:1025/3200 train_loss:3.6303 train_time:154031ms step_avg:151.76ms step:1026/3200 train_loss:3.6262 train_time:154180ms step_avg:151.75ms step:1027/3200 train_loss:3.8004 train_time:154330ms step_avg:151.75ms step:1028/3200 train_loss:3.4817 train_time:154480ms step_avg:151.75ms step:1029/3200 train_loss:3.5408 train_time:154631ms step_avg:151.75ms step:1030/3200 train_loss:3.4944 train_time:154781ms step_avg:151.75ms step:1031/3200 train_loss:3.6612 train_time:154931ms step_avg:151.74ms step:1032/3200 train_loss:3.6501 train_time:155080ms step_avg:151.74ms step:1033/3200 train_loss:3.8297 train_time:155229ms step_avg:151.74ms step:1034/3200 train_loss:3.6484 train_time:155380ms step_avg:151.74ms step:1035/3200 train_loss:3.5606 train_time:155529ms step_avg:151.74ms step:1036/3200 train_loss:3.5808 train_time:155680ms step_avg:151.74ms step:1037/3200 train_loss:3.6413 train_time:155830ms step_avg:151.73ms step:1038/3200 train_loss:3.9530 train_time:155980ms step_avg:151.73ms step:1039/3200 train_loss:3.7672 train_time:156130ms step_avg:151.73ms step:1040/3200 train_loss:3.6665 train_time:156280ms step_avg:151.73ms step:1041/3200 train_loss:3.5589 train_time:156430ms step_avg:151.73ms step:1042/3200 train_loss:3.6335 train_time:156580ms step_avg:151.73ms step:1043/3200 train_loss:3.6728 train_time:156730ms step_avg:151.72ms step:1044/3200 train_loss:3.5977 train_time:156879ms step_avg:151.72ms step:1045/3200 train_loss:3.6052 train_time:157029ms step_avg:151.72ms step:1046/3200 train_loss:3.6840 train_time:157179ms step_avg:151.72ms step:1047/3200 train_loss:3.5932 train_time:157330ms step_avg:151.72ms step:1048/3200 train_loss:3.7924 train_time:157480ms step_avg:151.71ms step:1049/3200 train_loss:3.6539 train_time:157630ms step_avg:151.71ms step:1050/3200 train_loss:3.5690 train_time:157781ms step_avg:151.71ms step:1051/3200 train_loss:3.5377 train_time:157932ms step_avg:151.71ms step:1052/3200 train_loss:3.6613 train_time:158080ms step_avg:151.71ms step:1053/3200 train_loss:3.5426 train_time:158230ms step_avg:151.71ms step:1054/3200 train_loss:3.8615 train_time:158381ms step_avg:151.71ms step:1055/3200 train_loss:3.6968 train_time:158530ms step_avg:151.70ms step:1056/3200 train_loss:3.5544 train_time:158680ms step_avg:151.70ms step:1057/3200 train_loss:3.6527 train_time:158831ms step_avg:151.70ms step:1058/3200 train_loss:3.7277 train_time:158980ms step_avg:151.70ms step:1059/3200 train_loss:3.4547 train_time:159129ms step_avg:151.70ms step:1060/3200 train_loss:3.5780 train_time:159280ms step_avg:151.69ms step:1061/3200 train_loss:3.6043 train_time:159431ms step_avg:151.69ms step:1062/3200 train_loss:3.5692 train_time:159581ms step_avg:151.69ms step:1063/3200 train_loss:3.5451 train_time:159732ms step_avg:151.69ms step:1064/3200 train_loss:3.6370 train_time:159881ms step_avg:151.69ms step:1065/3200 train_loss:3.5398 train_time:160031ms step_avg:151.69ms step:1066/3200 train_loss:3.5309 train_time:160181ms step_avg:151.69ms step:1067/3200 train_loss:3.5549 train_time:160331ms step_avg:151.69ms step:1068/3200 train_loss:3.4668 train_time:160481ms step_avg:151.68ms step:1069/3200 train_loss:3.5811 train_time:160631ms step_avg:151.68ms step:1070/3200 train_loss:3.4483 train_time:160781ms step_avg:151.68ms step:1071/3200 train_loss:3.7160 train_time:160931ms step_avg:151.68ms step:1072/3200 train_loss:3.6594 train_time:161081ms step_avg:151.68ms step:1073/3200 train_loss:3.6069 train_time:161231ms step_avg:151.68ms step:1074/3200 train_loss:3.6743 train_time:161380ms step_avg:151.67ms step:1075/3200 train_loss:3.6147 train_time:161532ms step_avg:151.67ms step:1076/3200 train_loss:3.5581 train_time:161681ms step_avg:151.67ms step:1077/3200 train_loss:3.9535 train_time:161833ms step_avg:151.67ms step:1078/3200 train_loss:3.6195 train_time:161982ms step_avg:151.67ms step:1079/3200 train_loss:3.3089 train_time:162133ms step_avg:151.67ms step:1080/3200 train_loss:3.6889 train_time:162282ms step_avg:151.67ms step:1081/3200 train_loss:3.6066 train_time:162434ms step_avg:151.67ms step:1082/3200 train_loss:3.6705 train_time:162583ms step_avg:151.66ms step:1083/3200 train_loss:3.7674 train_time:162735ms step_avg:151.66ms step:1084/3200 train_loss:3.6629 train_time:162884ms step_avg:151.66ms step:1085/3200 train_loss:3.6407 train_time:163034ms step_avg:151.66ms step:1086/3200 train_loss:3.6056 train_time:163184ms step_avg:151.66ms step:1087/3200 train_loss:3.7926 train_time:163335ms step_avg:151.66ms step:1088/3200 train_loss:3.6899 train_time:163486ms step_avg:151.66ms step:1089/3200 train_loss:3.5225 train_time:163638ms step_avg:151.66ms step:1090/3200 train_loss:3.5467 train_time:163789ms step_avg:151.66ms step:1091/3200 train_loss:3.6544 train_time:163939ms step_avg:151.66ms step:1092/3200 train_loss:3.4541 train_time:164090ms step_avg:151.65ms step:1093/3200 train_loss:3.6507 train_time:164240ms step_avg:151.65ms step:1094/3200 train_loss:3.7815 train_time:164389ms step_avg:151.65ms step:1095/3200 train_loss:3.6230 train_time:164540ms step_avg:151.65ms step:1096/3200 train_loss:3.5778 train_time:164690ms step_avg:151.65ms step:1097/3200 train_loss:3.5985 train_time:164840ms step_avg:151.65ms step:1098/3200 train_loss:3.6501 train_time:164991ms step_avg:151.65ms step:1099/3200 train_loss:3.7242 train_time:165141ms step_avg:151.64ms step:1100/3200 train_loss:3.6744 train_time:165292ms step_avg:151.64ms step:1101/3200 train_loss:3.6086 train_time:165440ms step_avg:151.64ms step:1102/3200 train_loss:3.4667 train_time:165592ms step_avg:151.64ms step:1103/3200 train_loss:3.5261 train_time:165741ms step_avg:151.64ms step:1104/3200 train_loss:3.6135 train_time:165893ms step_avg:151.64ms step:1105/3200 train_loss:3.4925 train_time:166043ms step_avg:151.64ms step:1106/3200 train_loss:4.2412 train_time:166195ms step_avg:151.64ms step:1107/3200 train_loss:3.3979 train_time:166343ms step_avg:151.63ms step:1108/3200 train_loss:3.7348 train_time:166494ms step_avg:151.63ms step:1109/3200 train_loss:3.5211 train_time:166643ms step_avg:151.63ms step:1110/3200 train_loss:3.6690 train_time:166795ms step_avg:151.63ms step:1111/3200 train_loss:3.5977 train_time:166945ms step_avg:151.63ms step:1112/3200 train_loss:3.6410 train_time:167096ms step_avg:151.63ms step:1113/3200 train_loss:3.7322 train_time:167245ms step_avg:151.63ms step:1114/3200 train_loss:3.5956 train_time:167398ms step_avg:151.63ms step:1115/3200 train_loss:3.5377 train_time:167546ms step_avg:151.63ms step:1116/3200 train_loss:3.4348 train_time:167698ms step_avg:151.63ms step:1117/3200 train_loss:3.6040 train_time:167847ms step_avg:151.62ms step:1118/3200 train_loss:3.7588 train_time:167997ms step_avg:151.62ms step:1119/3200 train_loss:3.7917 train_time:168148ms step_avg:151.62ms step:1120/3200 train_loss:3.6327 train_time:168298ms step_avg:151.62ms step:1121/3200 train_loss:3.6632 train_time:168448ms step_avg:151.62ms step:1122/3200 train_loss:3.5592 train_time:168599ms step_avg:151.62ms step:1123/3200 train_loss:3.6220 train_time:168749ms step_avg:151.62ms step:1124/3200 train_loss:3.7548 train_time:168899ms step_avg:151.62ms step:1125/3200 train_loss:3.5281 train_time:169051ms step_avg:151.61ms step:1125/3200 val_loss:3.5892 train_time:169097ms step_avg:151.66ms step:1126/3200 train_loss:3.4286 train_time:169207ms step_avg:151.62ms step:1127/3200 train_loss:3.6501 train_time:169360ms step_avg:151.62ms step:1128/3200 train_loss:3.8617 train_time:169512ms step_avg:151.62ms step:1129/3200 train_loss:3.4116 train_time:169659ms step_avg:151.62ms step:1130/3200 train_loss:3.7278 train_time:169809ms step_avg:151.62ms step:1131/3200 train_loss:3.5588 train_time:169957ms step_avg:151.61ms step:1132/3200 train_loss:3.5857 train_time:170108ms step_avg:151.61ms step:1133/3200 train_loss:3.5395 train_time:170261ms step_avg:151.61ms step:1134/3200 train_loss:3.6993 train_time:170559ms step_avg:151.74ms step:1135/3200 train_loss:3.6323 train_time:170716ms step_avg:151.75ms step:1136/3200 train_loss:3.6871 train_time:170866ms step_avg:151.75ms step:1137/3200 train_loss:3.7256 train_time:171014ms step_avg:151.74ms step:1138/3200 train_loss:3.6308 train_time:171163ms step_avg:151.74ms step:1139/3200 train_loss:3.5328 train_time:171312ms step_avg:151.74ms step:1140/3200 train_loss:3.8328 train_time:171630ms step_avg:151.89ms step:1141/3200 train_loss:3.6412 train_time:171783ms step_avg:151.89ms step:1142/3200 train_loss:3.7422 train_time:171932ms step_avg:151.88ms step:1143/3200 train_loss:3.6231 train_time:172080ms step_avg:151.88ms step:1144/3200 train_loss:3.5408 train_time:172229ms step_avg:151.88ms step:1145/3200 train_loss:3.6416 train_time:172377ms step_avg:151.87ms step:1146/3200 train_loss:3.7658 train_time:172528ms step_avg:151.87ms step:1147/3200 train_loss:3.7357 train_time:172682ms step_avg:151.88ms step:1148/3200 train_loss:3.6534 train_time:172833ms step_avg:151.87ms step:1149/3200 train_loss:3.6743 train_time:172983ms step_avg:151.87ms step:1150/3200 train_loss:3.5252 train_time:173132ms step_avg:151.87ms step:1151/3200 train_loss:3.5501 train_time:173281ms step_avg:151.87ms step:1152/3200 train_loss:3.5089 train_time:173432ms step_avg:151.87ms step:1153/3200 train_loss:3.6524 train_time:173581ms step_avg:151.86ms step:1154/3200 train_loss:3.6322 train_time:173734ms step_avg:151.87ms step:1155/3200 train_loss:3.6944 train_time:173884ms step_avg:151.86ms step:1156/3200 train_loss:3.5383 train_time:174034ms step_avg:151.86ms step:1157/3200 train_loss:3.7142 train_time:174184ms step_avg:151.86ms step:1158/3200 train_loss:3.6620 train_time:174333ms step_avg:151.86ms step:1159/3200 train_loss:3.4821 train_time:174482ms step_avg:151.86ms step:1160/3200 train_loss:3.5256 train_time:174634ms step_avg:151.86ms step:1161/3200 train_loss:3.5078 train_time:174783ms step_avg:151.85ms step:1162/3200 train_loss:3.3171 train_time:174935ms step_avg:151.85ms step:1163/3200 train_loss:3.6195 train_time:175086ms step_avg:151.85ms step:1164/3200 train_loss:3.5854 train_time:175236ms step_avg:151.85ms step:1165/3200 train_loss:3.4570 train_time:175387ms step_avg:151.85ms step:1166/3200 train_loss:3.4498 train_time:175536ms step_avg:151.85ms step:1167/3200 train_loss:3.5599 train_time:175687ms step_avg:151.85ms step:1168/3200 train_loss:3.5716 train_time:175837ms step_avg:151.85ms step:1169/3200 train_loss:3.8940 train_time:175989ms step_avg:151.85ms step:1170/3200 train_loss:3.5682 train_time:176138ms step_avg:151.84ms step:1171/3200 train_loss:3.5760 train_time:176289ms step_avg:151.84ms step:1172/3200 train_loss:3.4976 train_time:176438ms step_avg:151.84ms step:1173/3200 train_loss:3.5910 train_time:176589ms step_avg:151.84ms step:1174/3200 train_loss:3.7218 train_time:176738ms step_avg:151.84ms step:1175/3200 train_loss:3.5622 train_time:176890ms step_avg:151.84ms step:1176/3200 train_loss:3.5802 train_time:177040ms step_avg:151.84ms step:1177/3200 train_loss:3.6365 train_time:177192ms step_avg:151.84ms step:1178/3200 train_loss:3.6160 train_time:177343ms step_avg:151.83ms step:1179/3200 train_loss:3.6752 train_time:177494ms step_avg:151.83ms step:1180/3200 train_loss:3.5792 train_time:177644ms step_avg:151.83ms step:1181/3200 train_loss:3.5836 train_time:177794ms step_avg:151.83ms step:1182/3200 train_loss:3.5334 train_time:177945ms step_avg:151.83ms step:1183/3200 train_loss:3.5874 train_time:178095ms step_avg:151.83ms step:1184/3200 train_loss:3.5130 train_time:178246ms step_avg:151.83ms step:1185/3200 train_loss:3.6821 train_time:178395ms step_avg:151.83ms step:1186/3200 train_loss:3.7405 train_time:178546ms step_avg:151.83ms step:1187/3200 train_loss:3.5423 train_time:178696ms step_avg:151.82ms step:1188/3200 train_loss:3.5988 train_time:178847ms step_avg:151.82ms step:1189/3200 train_loss:3.6198 train_time:178996ms step_avg:151.82ms step:1190/3200 train_loss:3.4626 train_time:179148ms step_avg:151.82ms step:1191/3200 train_loss:3.6353 train_time:179297ms step_avg:151.82ms step:1192/3200 train_loss:3.7786 train_time:179449ms step_avg:151.82ms step:1193/3200 train_loss:3.5784 train_time:179599ms step_avg:151.82ms step:1194/3200 train_loss:3.4642 train_time:179751ms step_avg:151.82ms step:1195/3200 train_loss:3.7500 train_time:179899ms step_avg:151.81ms step:1196/3200 train_loss:3.5579 train_time:180051ms step_avg:151.81ms step:1197/3200 train_loss:3.5700 train_time:180200ms step_avg:151.81ms step:1198/3200 train_loss:3.4640 train_time:180353ms step_avg:151.81ms step:1199/3200 train_loss:3.4823 train_time:180502ms step_avg:151.81ms step:1200/3200 train_loss:3.5341 train_time:180654ms step_avg:151.81ms step:1201/3200 train_loss:3.6172 train_time:180803ms step_avg:151.81ms step:1202/3200 train_loss:3.6855 train_time:180954ms step_avg:151.81ms step:1203/3200 train_loss:3.7115 train_time:181104ms step_avg:151.81ms step:1204/3200 train_loss:3.5976 train_time:181254ms step_avg:151.80ms step:1205/3200 train_loss:3.5187 train_time:181403ms step_avg:151.80ms step:1206/3200 train_loss:3.6134 train_time:181554ms step_avg:151.80ms step:1207/3200 train_loss:3.6615 train_time:181704ms step_avg:151.80ms step:1208/3200 train_loss:3.7109 train_time:181855ms step_avg:151.80ms step:1209/3200 train_loss:3.5874 train_time:182005ms step_avg:151.80ms step:1210/3200 train_loss:3.4502 train_time:182155ms step_avg:151.80ms step:1211/3200 train_loss:3.4912 train_time:182305ms step_avg:151.79ms step:1212/3200 train_loss:3.5860 train_time:182455ms step_avg:151.79ms step:1213/3200 train_loss:3.6100 train_time:182604ms step_avg:151.79ms step:1214/3200 train_loss:3.6353 train_time:182755ms step_avg:151.79ms step:1215/3200 train_loss:3.5200 train_time:182906ms step_avg:151.79ms step:1216/3200 train_loss:3.5876 train_time:183056ms step_avg:151.79ms step:1217/3200 train_loss:3.5335 train_time:183207ms step_avg:151.79ms step:1218/3200 train_loss:3.5238 train_time:183356ms step_avg:151.78ms step:1219/3200 train_loss:3.6186 train_time:183505ms step_avg:151.78ms step:1220/3200 train_loss:3.4684 train_time:183656ms step_avg:151.78ms step:1221/3200 train_loss:3.6823 train_time:183807ms step_avg:151.78ms step:1222/3200 train_loss:3.7069 train_time:183956ms step_avg:151.78ms step:1223/3200 train_loss:3.6326 train_time:184107ms step_avg:151.78ms step:1224/3200 train_loss:3.4913 train_time:184257ms step_avg:151.78ms step:1225/3200 train_loss:3.4729 train_time:184407ms step_avg:151.78ms step:1226/3200 train_loss:3.5590 train_time:184556ms step_avg:151.77ms step:1227/3200 train_loss:3.5378 train_time:184706ms step_avg:151.77ms step:1228/3200 train_loss:3.4844 train_time:184856ms step_avg:151.77ms step:1229/3200 train_loss:3.6479 train_time:185007ms step_avg:151.77ms step:1230/3200 train_loss:3.5709 train_time:185156ms step_avg:151.77ms step:1231/3200 train_loss:3.6285 train_time:185307ms step_avg:151.77ms step:1232/3200 train_loss:3.7828 train_time:185457ms step_avg:151.76ms step:1233/3200 train_loss:3.6822 train_time:185607ms step_avg:151.76ms step:1234/3200 train_loss:3.6175 train_time:185756ms step_avg:151.76ms step:1235/3200 train_loss:3.7745 train_time:185907ms step_avg:151.76ms step:1236/3200 train_loss:3.5321 train_time:186058ms step_avg:151.76ms step:1237/3200 train_loss:3.5023 train_time:186209ms step_avg:151.76ms step:1238/3200 train_loss:3.4508 train_time:186359ms step_avg:151.76ms step:1239/3200 train_loss:3.5253 train_time:186510ms step_avg:151.76ms step:1240/3200 train_loss:3.5367 train_time:186659ms step_avg:151.75ms step:1241/3200 train_loss:3.5802 train_time:186810ms step_avg:151.75ms step:1242/3200 train_loss:3.6261 train_time:186958ms step_avg:151.75ms step:1243/3200 train_loss:3.4997 train_time:187110ms step_avg:151.75ms step:1244/3200 train_loss:3.6071 train_time:187259ms step_avg:151.75ms step:1245/3200 train_loss:3.6188 train_time:187411ms step_avg:151.75ms step:1246/3200 train_loss:3.6154 train_time:187560ms step_avg:151.75ms step:1247/3200 train_loss:3.4453 train_time:187713ms step_avg:151.75ms step:1248/3200 train_loss:3.5809 train_time:187862ms step_avg:151.75ms step:1249/3200 train_loss:3.6367 train_time:188013ms step_avg:151.75ms step:1250/3200 train_loss:3.6095 train_time:188163ms step_avg:151.74ms step:1250/3200 val_loss:3.5624 train_time:188210ms step_avg:151.78ms step:1251/3200 train_loss:3.5162 train_time:188321ms step_avg:151.75ms step:1252/3200 train_loss:3.7114 train_time:188475ms step_avg:151.75ms step:1253/3200 train_loss:3.5754 train_time:188624ms step_avg:151.75ms step:1254/3200 train_loss:3.5120 train_time:188773ms step_avg:151.75ms step:1255/3200 train_loss:3.6434 train_time:188922ms step_avg:151.74ms step:1256/3200 train_loss:3.7065 train_time:189072ms step_avg:151.74ms step:1257/3200 train_loss:3.5127 train_time:189221ms step_avg:151.74ms step:1258/3200 train_loss:3.5563 train_time:189375ms step_avg:151.74ms step:1259/3200 train_loss:3.5810 train_time:189526ms step_avg:151.74ms step:1260/3200 train_loss:3.5465 train_time:189677ms step_avg:151.74ms step:1261/3200 train_loss:3.4063 train_time:189826ms step_avg:151.74ms step:1262/3200 train_loss:3.5089 train_time:189976ms step_avg:151.74ms step:1263/3200 train_loss:3.5731 train_time:190125ms step_avg:151.74ms step:1264/3200 train_loss:3.4282 train_time:190276ms step_avg:151.74ms step:1265/3200 train_loss:3.6449 train_time:190427ms step_avg:151.73ms step:1266/3200 train_loss:3.6226 train_time:190579ms step_avg:151.74ms step:1267/3200 train_loss:3.6313 train_time:190729ms step_avg:151.73ms step:1268/3200 train_loss:3.5726 train_time:190880ms step_avg:151.73ms step:1269/3200 train_loss:3.6077 train_time:191028ms step_avg:151.73ms step:1270/3200 train_loss:3.4627 train_time:191178ms step_avg:151.73ms step:1271/3200 train_loss:3.3144 train_time:191328ms step_avg:151.73ms step:1272/3200 train_loss:3.5912 train_time:191480ms step_avg:151.73ms step:1273/3200 train_loss:3.5541 train_time:191629ms step_avg:151.73ms step:1274/3200 train_loss:3.6124 train_time:191781ms step_avg:151.73ms step:1275/3200 train_loss:3.5598 train_time:191929ms step_avg:151.72ms step:1276/3200 train_loss:3.6478 train_time:192081ms step_avg:151.72ms step:1277/3200 train_loss:3.6689 train_time:192230ms step_avg:151.72ms step:1278/3200 train_loss:3.6308 train_time:192382ms step_avg:151.72ms step:1279/3200 train_loss:3.6263 train_time:192531ms step_avg:151.72ms step:1280/3200 train_loss:3.4557 train_time:192684ms step_avg:151.72ms step:1281/3200 train_loss:3.5735 train_time:192834ms step_avg:151.72ms step:1282/3200 train_loss:3.6316 train_time:192984ms step_avg:151.72ms step:1283/3200 train_loss:3.6694 train_time:193134ms step_avg:151.72ms step:1284/3200 train_loss:3.5587 train_time:193284ms step_avg:151.71ms step:1285/3200 train_loss:3.5808 train_time:193433ms step_avg:151.71ms step:1286/3200 train_loss:3.5694 train_time:193585ms step_avg:151.71ms step:1287/3200 train_loss:3.5449 train_time:193735ms step_avg:151.71ms step:1288/3200 train_loss:3.6799 train_time:193886ms step_avg:151.71ms step:1289/3200 train_loss:3.5125 train_time:194036ms step_avg:151.71ms step:1290/3200 train_loss:3.5975 train_time:194186ms step_avg:151.71ms step:1291/3200 train_loss:3.6703 train_time:194336ms step_avg:151.71ms step:1292/3200 train_loss:3.5974 train_time:194486ms step_avg:151.71ms step:1293/3200 train_loss:3.7014 train_time:194637ms step_avg:151.70ms step:1294/3200 train_loss:3.7179 train_time:194786ms step_avg:151.70ms step:1295/3200 train_loss:3.6915 train_time:194937ms step_avg:151.70ms step:1296/3200 train_loss:3.4953 train_time:195086ms step_avg:151.70ms step:1297/3200 train_loss:3.5681 train_time:195236ms step_avg:151.70ms step:1298/3200 train_loss:3.4711 train_time:195386ms step_avg:151.70ms step:1299/3200 train_loss:3.5346 train_time:195536ms step_avg:151.70ms step:1300/3200 train_loss:3.6099 train_time:195686ms step_avg:151.69ms step:1301/3200 train_loss:3.6123 train_time:195837ms step_avg:151.69ms step:1302/3200 train_loss:3.6165 train_time:195987ms step_avg:151.69ms step:1303/3200 train_loss:3.7718 train_time:196138ms step_avg:151.69ms step:1304/3200 train_loss:3.5446 train_time:196287ms step_avg:151.69ms step:1305/3200 train_loss:3.7429 train_time:196437ms step_avg:151.69ms step:1306/3200 train_loss:3.4770 train_time:196586ms step_avg:151.69ms step:1307/3200 train_loss:3.6717 train_time:196737ms step_avg:151.69ms step:1308/3200 train_loss:3.6664 train_time:196887ms step_avg:151.68ms step:1309/3200 train_loss:3.5270 train_time:197039ms step_avg:151.68ms step:1310/3200 train_loss:3.5007 train_time:197189ms step_avg:151.68ms step:1311/3200 train_loss:3.5408 train_time:197340ms step_avg:151.68ms step:1312/3200 train_loss:3.4990 train_time:197488ms step_avg:151.68ms step:1313/3200 train_loss:3.6115 train_time:197640ms step_avg:151.68ms step:1314/3200 train_loss:3.5629 train_time:197789ms step_avg:151.68ms step:1315/3200 train_loss:3.2790 train_time:197941ms step_avg:151.68ms step:1316/3200 train_loss:3.5057 train_time:198091ms step_avg:151.68ms step:1317/3200 train_loss:3.5935 train_time:198243ms step_avg:151.68ms step:1318/3200 train_loss:3.6185 train_time:198391ms step_avg:151.68ms step:1319/3200 train_loss:3.4997 train_time:198543ms step_avg:151.68ms step:1320/3200 train_loss:3.6307 train_time:198693ms step_avg:151.67ms step:1321/3200 train_loss:3.6830 train_time:198844ms step_avg:151.67ms step:1322/3200 train_loss:3.5742 train_time:198993ms step_avg:151.67ms step:1323/3200 train_loss:3.5177 train_time:199292ms step_avg:151.78ms step:1324/3200 train_loss:3.5513 train_time:199450ms step_avg:151.79ms step:1325/3200 train_loss:3.6496 train_time:199599ms step_avg:151.79ms step:1326/3200 train_loss:3.7013 train_time:199747ms step_avg:151.78ms step:1327/3200 train_loss:3.4565 train_time:199896ms step_avg:151.78ms step:1328/3200 train_loss:3.3804 train_time:200044ms step_avg:151.78ms step:1329/3200 train_loss:3.6859 train_time:200194ms step_avg:151.78ms step:1330/3200 train_loss:3.5274 train_time:200509ms step_avg:151.90ms step:1331/3200 train_loss:3.6561 train_time:200657ms step_avg:151.90ms step:1332/3200 train_loss:3.5567 train_time:200806ms step_avg:151.90ms step:1333/3200 train_loss:3.9713 train_time:200954ms step_avg:151.89ms step:1334/3200 train_loss:3.6625 train_time:201103ms step_avg:151.89ms step:1335/3200 train_loss:3.5785 train_time:201251ms step_avg:151.89ms step:1336/3200 train_loss:3.5187 train_time:201404ms step_avg:151.89ms step:1337/3200 train_loss:3.5166 train_time:201557ms step_avg:151.89ms step:1338/3200 train_loss:3.7666 train_time:201707ms step_avg:151.89ms step:1339/3200 train_loss:3.7123 train_time:201857ms step_avg:151.89ms step:1340/3200 train_loss:3.5567 train_time:202005ms step_avg:151.88ms step:1341/3200 train_loss:3.5097 train_time:202154ms step_avg:151.88ms step:1342/3200 train_loss:3.8120 train_time:202304ms step_avg:151.88ms step:1343/3200 train_loss:3.5852 train_time:202454ms step_avg:151.88ms step:1344/3200 train_loss:3.5800 train_time:202608ms step_avg:151.88ms step:1345/3200 train_loss:3.6388 train_time:202759ms step_avg:151.88ms step:1346/3200 train_loss:3.6059 train_time:202909ms step_avg:151.88ms step:1347/3200 train_loss:3.5091 train_time:203059ms step_avg:151.88ms step:1348/3200 train_loss:3.4635 train_time:203208ms step_avg:151.87ms step:1349/3200 train_loss:3.5575 train_time:203359ms step_avg:151.87ms step:1350/3200 train_loss:3.4820 train_time:203510ms step_avg:151.87ms step:1351/3200 train_loss:3.6179 train_time:203662ms step_avg:151.87ms step:1352/3200 train_loss:3.4727 train_time:203812ms step_avg:151.87ms step:1353/3200 train_loss:3.5278 train_time:203964ms step_avg:151.87ms step:1354/3200 train_loss:3.6285 train_time:204113ms step_avg:151.87ms step:1355/3200 train_loss:3.4778 train_time:204263ms step_avg:151.87ms step:1356/3200 train_loss:3.3992 train_time:204412ms step_avg:151.87ms step:1357/3200 train_loss:3.7396 train_time:204563ms step_avg:151.87ms step:1358/3200 train_loss:3.6667 train_time:204714ms step_avg:151.86ms step:1359/3200 train_loss:3.3963 train_time:204864ms step_avg:151.86ms step:1360/3200 train_loss:3.6702 train_time:205013ms step_avg:151.86ms step:1361/3200 train_loss:3.5612 train_time:205162ms step_avg:151.86ms step:1362/3200 train_loss:3.4222 train_time:205311ms step_avg:151.86ms step:1363/3200 train_loss:3.6028 train_time:205463ms step_avg:151.86ms step:1364/3200 train_loss:3.4957 train_time:205614ms step_avg:151.86ms step:1365/3200 train_loss:3.5138 train_time:205765ms step_avg:151.86ms step:1366/3200 train_loss:3.5358 train_time:205917ms step_avg:151.86ms step:1367/3200 train_loss:3.6360 train_time:206066ms step_avg:151.85ms step:1368/3200 train_loss:3.6200 train_time:206215ms step_avg:151.85ms step:1369/3200 train_loss:3.5770 train_time:206365ms step_avg:151.85ms step:1370/3200 train_loss:3.4916 train_time:206515ms step_avg:151.85ms step:1371/3200 train_loss:3.8062 train_time:206665ms step_avg:151.85ms step:1372/3200 train_loss:3.5459 train_time:206817ms step_avg:151.85ms step:1373/3200 train_loss:3.5856 train_time:206966ms step_avg:151.85ms step:1374/3200 train_loss:3.5840 train_time:207117ms step_avg:151.85ms step:1375/3200 train_loss:3.3844 train_time:207265ms step_avg:151.84ms step:1375/3200 val_loss:3.5420 train_time:207312ms step_avg:151.88ms step:1376/3200 train_loss:3.7787 train_time:207422ms step_avg:151.85ms step:1377/3200 train_loss:3.5626 train_time:207576ms step_avg:151.85ms step:1378/3200 train_loss:3.7067 train_time:207724ms step_avg:151.85ms step:1379/3200 train_loss:3.7408 train_time:207874ms step_avg:151.84ms step:1380/3200 train_loss:3.4115 train_time:208023ms step_avg:151.84ms step:1381/3200 train_loss:3.5502 train_time:208173ms step_avg:151.84ms step:1382/3200 train_loss:4.0074 train_time:208322ms step_avg:151.84ms step:1383/3200 train_loss:3.4566 train_time:208476ms step_avg:151.84ms step:1384/3200 train_loss:3.6121 train_time:208627ms step_avg:151.84ms step:1385/3200 train_loss:3.6962 train_time:208777ms step_avg:151.84ms step:1386/3200 train_loss:3.6125 train_time:208927ms step_avg:151.84ms step:1387/3200 train_loss:3.6024 train_time:209076ms step_avg:151.83ms step:1388/3200 train_loss:3.4258 train_time:209226ms step_avg:151.83ms step:1389/3200 train_loss:3.5676 train_time:209376ms step_avg:151.83ms step:1390/3200 train_loss:3.5380 train_time:209527ms step_avg:151.83ms step:1391/3200 train_loss:3.7990 train_time:209677ms step_avg:151.83ms step:1392/3200 train_loss:3.5161 train_time:209827ms step_avg:151.83ms step:1393/3200 train_loss:3.5108 train_time:209977ms step_avg:151.83ms step:1394/3200 train_loss:3.4703 train_time:210126ms step_avg:151.83ms step:1395/3200 train_loss:3.7551 train_time:210276ms step_avg:151.82ms step:1396/3200 train_loss:3.6489 train_time:210426ms step_avg:151.82ms step:1397/3200 train_loss:3.6542 train_time:210576ms step_avg:151.82ms step:1398/3200 train_loss:3.5240 train_time:210728ms step_avg:151.82ms step:1399/3200 train_loss:3.4975 train_time:210878ms step_avg:151.82ms step:1400/3200 train_loss:3.5522 train_time:211028ms step_avg:151.82ms step:1401/3200 train_loss:3.5285 train_time:211178ms step_avg:151.82ms step:1402/3200 train_loss:3.5602 train_time:211329ms step_avg:151.82ms step:1403/3200 train_loss:3.5257 train_time:211478ms step_avg:151.81ms step:1404/3200 train_loss:3.7559 train_time:211628ms step_avg:151.81ms step:1405/3200 train_loss:3.4987 train_time:211777ms step_avg:151.81ms step:1406/3200 train_loss:3.5468 train_time:211928ms step_avg:151.81ms step:1407/3200 train_loss:3.5364 train_time:212078ms step_avg:151.81ms step:1408/3200 train_loss:3.4086 train_time:212228ms step_avg:151.81ms step:1409/3200 train_loss:3.5270 train_time:212377ms step_avg:151.81ms step:1410/3200 train_loss:3.5086 train_time:212527ms step_avg:151.80ms step:1411/3200 train_loss:3.5058 train_time:212677ms step_avg:151.80ms step:1412/3200 train_loss:3.5938 train_time:212826ms step_avg:151.80ms step:1413/3200 train_loss:3.5337 train_time:212977ms step_avg:151.80ms step:1414/3200 train_loss:3.5839 train_time:213127ms step_avg:151.80ms step:1415/3200 train_loss:3.5719 train_time:213277ms step_avg:151.80ms step:1416/3200 train_loss:3.6485 train_time:213428ms step_avg:151.80ms step:1417/3200 train_loss:3.4508 train_time:213577ms step_avg:151.80ms step:1418/3200 train_loss:3.5100 train_time:213727ms step_avg:151.79ms step:1419/3200 train_loss:3.6098 train_time:213877ms step_avg:151.79ms step:1420/3200 train_loss:3.6331 train_time:214027ms step_avg:151.79ms step:1421/3200 train_loss:3.6149 train_time:214178ms step_avg:151.79ms step:1422/3200 train_loss:3.6029 train_time:214329ms step_avg:151.79ms step:1423/3200 train_loss:3.5931 train_time:214478ms step_avg:151.79ms step:1424/3200 train_loss:3.5622 train_time:214628ms step_avg:151.79ms step:1425/3200 train_loss:3.5680 train_time:214777ms step_avg:151.79ms step:1426/3200 train_loss:3.4475 train_time:214928ms step_avg:151.79ms step:1427/3200 train_loss:3.5479 train_time:215077ms step_avg:151.78ms step:1428/3200 train_loss:3.4934 train_time:215227ms step_avg:151.78ms step:1429/3200 train_loss:3.6121 train_time:215378ms step_avg:151.78ms step:1430/3200 train_loss:3.5665 train_time:215528ms step_avg:151.78ms step:1431/3200 train_loss:3.5056 train_time:215678ms step_avg:151.78ms step:1432/3200 train_loss:3.5498 train_time:215828ms step_avg:151.78ms step:1433/3200 train_loss:3.5802 train_time:215977ms step_avg:151.78ms step:1434/3200 train_loss:3.4486 train_time:216128ms step_avg:151.78ms step:1435/3200 train_loss:3.5550 train_time:216278ms step_avg:151.77ms step:1436/3200 train_loss:3.3753 train_time:216428ms step_avg:151.77ms step:1437/3200 train_loss:3.4434 train_time:216578ms step_avg:151.77ms step:1438/3200 train_loss:3.6412 train_time:216729ms step_avg:151.77ms step:1439/3200 train_loss:3.6001 train_time:216878ms step_avg:151.77ms step:1440/3200 train_loss:3.5459 train_time:217029ms step_avg:151.77ms step:1441/3200 train_loss:3.4070 train_time:217179ms step_avg:151.77ms step:1442/3200 train_loss:3.5677 train_time:217329ms step_avg:151.77ms step:1443/3200 train_loss:3.6389 train_time:217478ms step_avg:151.76ms step:1444/3200 train_loss:3.7150 train_time:217630ms step_avg:151.76ms step:1445/3200 train_loss:3.6742 train_time:217779ms step_avg:151.76ms step:1446/3200 train_loss:3.5631 train_time:217931ms step_avg:151.76ms step:1447/3200 train_loss:3.4327 train_time:218079ms step_avg:151.76ms step:1448/3200 train_loss:3.5092 train_time:218231ms step_avg:151.76ms step:1449/3200 train_loss:3.5275 train_time:218380ms step_avg:151.76ms step:1450/3200 train_loss:3.6452 train_time:218530ms step_avg:151.76ms step:1451/3200 train_loss:3.6296 train_time:218680ms step_avg:151.76ms step:1452/3200 train_loss:3.4489 train_time:218830ms step_avg:151.75ms step:1453/3200 train_loss:3.5654 train_time:218980ms step_avg:151.75ms step:1454/3200 train_loss:3.4806 train_time:219131ms step_avg:151.75ms step:1455/3200 train_loss:3.5121 train_time:219280ms step_avg:151.75ms step:1456/3200 train_loss:3.5635 train_time:219431ms step_avg:151.75ms step:1457/3200 train_loss:3.4939 train_time:219581ms step_avg:151.75ms step:1458/3200 train_loss:3.3870 train_time:219732ms step_avg:151.75ms step:1459/3200 train_loss:3.6382 train_time:219881ms step_avg:151.75ms step:1460/3200 train_loss:3.5012 train_time:220034ms step_avg:151.75ms step:1461/3200 train_loss:3.5513 train_time:220183ms step_avg:151.75ms step:1462/3200 train_loss:3.6747 train_time:220335ms step_avg:151.75ms step:1463/3200 train_loss:3.4969 train_time:220485ms step_avg:151.74ms step:1464/3200 train_loss:3.6909 train_time:220636ms step_avg:151.74ms step:1465/3200 train_loss:3.5825 train_time:220786ms step_avg:151.74ms step:1466/3200 train_loss:3.5892 train_time:220936ms step_avg:151.74ms step:1467/3200 train_loss:3.5035 train_time:221086ms step_avg:151.74ms step:1468/3200 train_loss:3.6651 train_time:221236ms step_avg:151.74ms step:1469/3200 train_loss:3.5300 train_time:221386ms step_avg:151.74ms step:1470/3200 train_loss:3.5001 train_time:221537ms step_avg:151.74ms step:1471/3200 train_loss:3.5530 train_time:221687ms step_avg:151.74ms step:1472/3200 train_loss:3.4774 train_time:221837ms step_avg:151.74ms step:1473/3200 train_loss:3.5672 train_time:221987ms step_avg:151.73ms step:1474/3200 train_loss:3.6598 train_time:222137ms step_avg:151.73ms step:1475/3200 train_loss:3.5387 train_time:222288ms step_avg:151.73ms step:1476/3200 train_loss:3.3691 train_time:222437ms step_avg:151.73ms step:1477/3200 train_loss:3.4899 train_time:222589ms step_avg:151.73ms step:1478/3200 train_loss:3.4685 train_time:222739ms step_avg:151.73ms step:1479/3200 train_loss:3.5442 train_time:222891ms step_avg:151.73ms step:1480/3200 train_loss:3.6302 train_time:223040ms step_avg:151.73ms step:1481/3200 train_loss:3.4973 train_time:223191ms step_avg:151.73ms step:1482/3200 train_loss:3.6699 train_time:223340ms step_avg:151.73ms step:1483/3200 train_loss:3.5972 train_time:223491ms step_avg:151.73ms step:1484/3200 train_loss:3.5041 train_time:223640ms step_avg:151.72ms step:1485/3200 train_loss:3.5011 train_time:223792ms step_avg:151.72ms step:1486/3200 train_loss:3.4929 train_time:223942ms step_avg:151.72ms step:1487/3200 train_loss:3.4688 train_time:224094ms step_avg:151.72ms step:1488/3200 train_loss:3.5554 train_time:224243ms step_avg:151.72ms step:1489/3200 train_loss:3.4627 train_time:224395ms step_avg:151.72ms step:1490/3200 train_loss:3.5521 train_time:224545ms step_avg:151.72ms step:1491/3200 train_loss:3.4926 train_time:224696ms step_avg:151.72ms step:1492/3200 train_loss:3.4118 train_time:224847ms step_avg:151.72ms step:1493/3200 train_loss:3.4903 train_time:224997ms step_avg:151.72ms step:1494/3200 train_loss:3.6631 train_time:225148ms step_avg:151.72ms step:1495/3200 train_loss:3.5142 train_time:225297ms step_avg:151.72ms step:1496/3200 train_loss:3.2838 train_time:225447ms step_avg:151.71ms step:1497/3200 train_loss:3.5811 train_time:225596ms step_avg:151.71ms step:1498/3200 train_loss:3.5364 train_time:225747ms step_avg:151.71ms step:1499/3200 train_loss:3.5904 train_time:225897ms step_avg:151.71ms step:1500/3200 train_loss:3.5401 train_time:226047ms step_avg:151.71ms step:1500/3200 val_loss:3.5215 train_time:226093ms step_avg:151.74ms step:1501/3200 train_loss:3.5246 train_time:226204ms step_avg:151.71ms step:1502/3200 train_loss:3.3180 train_time:226356ms step_avg:151.71ms step:1503/3200 train_loss:3.5914 train_time:226504ms step_avg:151.71ms step:1504/3200 train_loss:3.4716 train_time:226653ms step_avg:151.71ms step:1505/3200 train_loss:3.4776 train_time:226802ms step_avg:151.71ms step:1506/3200 train_loss:3.4415 train_time:226951ms step_avg:151.71ms step:1507/3200 train_loss:3.5235 train_time:227102ms step_avg:151.70ms step:1508/3200 train_loss:3.4441 train_time:227253ms step_avg:151.70ms step:1509/3200 train_loss:3.7486 train_time:227404ms step_avg:151.70ms step:1510/3200 train_loss:3.4958 train_time:227554ms step_avg:151.70ms step:1511/3200 train_loss:3.4991 train_time:227703ms step_avg:151.70ms step:1512/3200 train_loss:3.6247 train_time:227995ms step_avg:151.79ms step:1513/3200 train_loss:3.6532 train_time:228154ms step_avg:151.80ms step:1514/3200 train_loss:3.5097 train_time:228302ms step_avg:151.80ms step:1515/3200 train_loss:3.3412 train_time:228450ms step_avg:151.79ms step:1516/3200 train_loss:3.4694 train_time:228599ms step_avg:151.79ms step:1517/3200 train_loss:3.4801 train_time:228747ms step_avg:151.79ms step:1518/3200 train_loss:3.5548 train_time:228898ms step_avg:151.79ms step:1519/3200 train_loss:3.4430 train_time:229053ms step_avg:151.79ms step:1520/3200 train_loss:3.7362 train_time:229364ms step_avg:151.90ms step:1521/3200 train_loss:3.3979 train_time:229516ms step_avg:151.90ms step:1522/3200 train_loss:3.4555 train_time:229664ms step_avg:151.89ms step:1523/3200 train_loss:3.6039 train_time:229814ms step_avg:151.89ms step:1524/3200 train_loss:3.4603 train_time:229962ms step_avg:151.89ms step:1525/3200 train_loss:3.5618 train_time:230111ms step_avg:151.89ms step:1526/3200 train_loss:3.5537 train_time:230262ms step_avg:151.89ms step:1527/3200 train_loss:3.5130 train_time:230416ms step_avg:151.89ms step:1528/3200 train_loss:3.5176 train_time:230567ms step_avg:151.89ms step:1529/3200 train_loss:3.6599 train_time:230718ms step_avg:151.89ms step:1530/3200 train_loss:3.6324 train_time:230866ms step_avg:151.89ms step:1531/3200 train_loss:3.4651 train_time:231017ms step_avg:151.88ms step:1532/3200 train_loss:3.4352 train_time:231165ms step_avg:151.88ms step:1533/3200 train_loss:3.5756 train_time:231315ms step_avg:151.88ms step:1534/3200 train_loss:3.5316 train_time:231466ms step_avg:151.88ms step:1535/3200 train_loss:3.5213 train_time:231618ms step_avg:151.88ms step:1536/3200 train_loss:3.5173 train_time:231768ms step_avg:151.88ms step:1537/3200 train_loss:3.4568 train_time:231919ms step_avg:151.88ms step:1538/3200 train_loss:3.5138 train_time:232068ms step_avg:151.88ms step:1539/3200 train_loss:3.6875 train_time:232218ms step_avg:151.88ms step:1540/3200 train_loss:3.6205 train_time:232369ms step_avg:151.87ms step:1541/3200 train_loss:3.5275 train_time:232523ms step_avg:151.88ms step:1542/3200 train_loss:3.4821 train_time:232674ms step_avg:151.88ms step:1543/3200 train_loss:3.4844 train_time:232824ms step_avg:151.87ms step:1544/3200 train_loss:3.4406 train_time:232974ms step_avg:151.87ms step:1545/3200 train_loss:3.5344 train_time:233122ms step_avg:151.87ms step:1546/3200 train_loss:3.4985 train_time:233272ms step_avg:151.87ms step:1547/3200 train_loss:3.4811 train_time:233422ms step_avg:151.87ms step:1548/3200 train_loss:3.4386 train_time:233572ms step_avg:151.87ms step:1549/3200 train_loss:3.4814 train_time:233723ms step_avg:151.87ms step:1550/3200 train_loss:3.5928 train_time:233874ms step_avg:151.87ms step:1551/3200 train_loss:3.5154 train_time:234023ms step_avg:151.86ms step:1552/3200 train_loss:3.4541 train_time:234174ms step_avg:151.86ms step:1553/3200 train_loss:3.4510 train_time:234324ms step_avg:151.86ms step:1554/3200 train_loss:3.4429 train_time:234476ms step_avg:151.86ms step:1555/3200 train_loss:3.5729 train_time:234626ms step_avg:151.86ms step:1556/3200 train_loss:3.5728 train_time:234777ms step_avg:151.86ms step:1557/3200 train_loss:3.5072 train_time:234926ms step_avg:151.86ms step:1558/3200 train_loss:3.5574 train_time:235077ms step_avg:151.86ms step:1559/3200 train_loss:3.4881 train_time:235225ms step_avg:151.86ms step:1560/3200 train_loss:3.3934 train_time:235375ms step_avg:151.85ms step:1561/3200 train_loss:3.6466 train_time:235524ms step_avg:151.85ms step:1562/3200 train_loss:3.4609 train_time:235677ms step_avg:151.85ms step:1563/3200 train_loss:3.4446 train_time:235826ms step_avg:151.85ms step:1564/3200 train_loss:3.5683 train_time:235977ms step_avg:151.85ms step:1565/3200 train_loss:3.3959 train_time:236126ms step_avg:151.85ms step:1566/3200 train_loss:3.4499 train_time:236275ms step_avg:151.85ms step:1567/3200 train_loss:3.6021 train_time:236425ms step_avg:151.85ms step:1568/3200 train_loss:3.4786 train_time:236577ms step_avg:151.85ms step:1569/3200 train_loss:3.4649 train_time:236727ms step_avg:151.85ms step:1570/3200 train_loss:3.5645 train_time:236879ms step_avg:151.85ms step:1571/3200 train_loss:3.5748 train_time:237030ms step_avg:151.85ms step:1572/3200 train_loss:3.3989 train_time:237181ms step_avg:151.84ms step:1573/3200 train_loss:3.4302 train_time:237333ms step_avg:151.84ms step:1574/3200 train_loss:3.5464 train_time:237484ms step_avg:151.84ms step:1575/3200 train_loss:3.4140 train_time:237635ms step_avg:151.84ms step:1576/3200 train_loss:3.5619 train_time:237785ms step_avg:151.84ms step:1577/3200 train_loss:3.4671 train_time:237935ms step_avg:151.84ms step:1578/3200 train_loss:3.5169 train_time:238085ms step_avg:151.84ms step:1579/3200 train_loss:3.4958 train_time:238236ms step_avg:151.84ms step:1580/3200 train_loss:3.4629 train_time:238385ms step_avg:151.84ms step:1581/3200 train_loss:3.4407 train_time:238537ms step_avg:151.84ms step:1582/3200 train_loss:3.6816 train_time:238686ms step_avg:151.84ms step:1583/3200 train_loss:3.4525 train_time:238838ms step_avg:151.84ms step:1584/3200 train_loss:3.6079 train_time:238987ms step_avg:151.83ms step:1585/3200 train_loss:3.4408 train_time:239138ms step_avg:151.83ms step:1586/3200 train_loss:3.6004 train_time:239286ms step_avg:151.83ms step:1587/3200 train_loss:3.3811 train_time:239437ms step_avg:151.83ms step:1588/3200 train_loss:3.5804 train_time:239586ms step_avg:151.83ms step:1589/3200 train_loss:3.4866 train_time:239738ms step_avg:151.83ms step:1590/3200 train_loss:3.6438 train_time:239887ms step_avg:151.83ms step:1591/3200 train_loss:3.4589 train_time:240039ms step_avg:151.83ms step:1592/3200 train_loss:3.4786 train_time:240188ms step_avg:151.83ms step:1593/3200 train_loss:3.5492 train_time:240340ms step_avg:151.83ms step:1594/3200 train_loss:3.5199 train_time:240488ms step_avg:151.82ms step:1595/3200 train_loss:3.4960 train_time:240641ms step_avg:151.82ms step:1596/3200 train_loss:3.6396 train_time:240791ms step_avg:151.82ms step:1597/3200 train_loss:3.3682 train_time:240942ms step_avg:151.82ms step:1598/3200 train_loss:3.5335 train_time:241091ms step_avg:151.82ms step:1599/3200 train_loss:3.5758 train_time:241244ms step_avg:151.82ms step:1600/3200 train_loss:3.6223 train_time:241394ms step_avg:151.82ms step:1601/3200 train_loss:3.4763 train_time:241544ms step_avg:151.82ms step:1602/3200 train_loss:3.7682 train_time:241693ms step_avg:151.82ms step:1603/3200 train_loss:3.6497 train_time:241843ms step_avg:151.82ms step:1604/3200 train_loss:3.4303 train_time:241994ms step_avg:151.82ms step:1605/3200 train_loss:3.4666 train_time:242144ms step_avg:151.81ms step:1606/3200 train_loss:3.3537 train_time:242294ms step_avg:151.81ms step:1607/3200 train_loss:3.6863 train_time:242444ms step_avg:151.81ms step:1608/3200 train_loss:3.4809 train_time:242595ms step_avg:151.81ms step:1609/3200 train_loss:3.5058 train_time:242745ms step_avg:151.81ms step:1610/3200 train_loss:3.4578 train_time:242895ms step_avg:151.81ms step:1611/3200 train_loss:4.0614 train_time:243045ms step_avg:151.81ms step:1612/3200 train_loss:3.6889 train_time:243196ms step_avg:151.81ms step:1613/3200 train_loss:3.5981 train_time:243345ms step_avg:151.81ms step:1614/3200 train_loss:3.4608 train_time:243495ms step_avg:151.80ms step:1615/3200 train_loss:3.5118 train_time:243645ms step_avg:151.80ms step:1616/3200 train_loss:3.5051 train_time:243796ms step_avg:151.80ms step:1617/3200 train_loss:3.4708 train_time:243946ms step_avg:151.80ms step:1618/3200 train_loss:3.5487 train_time:244098ms step_avg:151.80ms step:1619/3200 train_loss:3.5014 train_time:244248ms step_avg:151.80ms step:1620/3200 train_loss:3.3844 train_time:244400ms step_avg:151.80ms step:1621/3200 train_loss:3.6534 train_time:244549ms step_avg:151.80ms step:1622/3200 train_loss:3.5722 train_time:244701ms step_avg:151.80ms step:1623/3200 train_loss:3.3554 train_time:244852ms step_avg:151.80ms step:1624/3200 train_loss:3.4760 train_time:245002ms step_avg:151.80ms step:1625/3200 train_loss:3.4283 train_time:245152ms step_avg:151.80ms step:1625/3200 val_loss:3.5053 train_time:245199ms step_avg:151.83ms step:1626/3200 train_loss:3.5148 train_time:245308ms step_avg:151.80ms step:1627/3200 train_loss:3.4724 train_time:245461ms step_avg:151.80ms step:1628/3200 train_loss:3.4405 train_time:245609ms step_avg:151.80ms step:1629/3200 train_loss:3.5469 train_time:245758ms step_avg:151.80ms step:1630/3200 train_loss:3.4451 train_time:245905ms step_avg:151.79ms step:1631/3200 train_loss:3.5018 train_time:246055ms step_avg:151.79ms step:1632/3200 train_loss:3.3850 train_time:246205ms step_avg:151.79ms step:1633/3200 train_loss:3.3501 train_time:246359ms step_avg:151.79ms step:1634/3200 train_loss:3.5123 train_time:246509ms step_avg:151.79ms step:1635/3200 train_loss:3.4995 train_time:246660ms step_avg:151.79ms step:1636/3200 train_loss:3.4409 train_time:246809ms step_avg:151.79ms step:1637/3200 train_loss:3.5242 train_time:246958ms step_avg:151.79ms step:1638/3200 train_loss:3.5700 train_time:247106ms step_avg:151.79ms step:1639/3200 train_loss:3.6102 train_time:247259ms step_avg:151.79ms step:1640/3200 train_loss:3.7681 train_time:247410ms step_avg:151.79ms step:1641/3200 train_loss:3.5826 train_time:247563ms step_avg:151.79ms step:1642/3200 train_loss:3.5043 train_time:247711ms step_avg:151.78ms step:1643/3200 train_loss:3.5912 train_time:247864ms step_avg:151.78ms step:1644/3200 train_loss:3.4910 train_time:248012ms step_avg:151.78ms step:1645/3200 train_loss:3.5088 train_time:248164ms step_avg:151.78ms step:1646/3200 train_loss:3.5016 train_time:248314ms step_avg:151.78ms step:1647/3200 train_loss:3.2827 train_time:248466ms step_avg:151.78ms step:1648/3200 train_loss:3.5472 train_time:248616ms step_avg:151.78ms step:1649/3200 train_loss:3.4140 train_time:248766ms step_avg:151.78ms step:1650/3200 train_loss:3.4846 train_time:248916ms step_avg:151.78ms step:1651/3200 train_loss:3.4602 train_time:249065ms step_avg:151.78ms step:1652/3200 train_loss:3.5288 train_time:249216ms step_avg:151.78ms step:1653/3200 train_loss:3.4577 train_time:249367ms step_avg:151.78ms step:1654/3200 train_loss:3.5837 train_time:249518ms step_avg:151.77ms step:1655/3200 train_loss:3.5747 train_time:249667ms step_avg:151.77ms step:1656/3200 train_loss:3.4001 train_time:249818ms step_avg:151.77ms step:1657/3200 train_loss:3.5549 train_time:249967ms step_avg:151.77ms step:1658/3200 train_loss:3.4501 train_time:250117ms step_avg:151.77ms step:1659/3200 train_loss:3.4301 train_time:250267ms step_avg:151.77ms step:1660/3200 train_loss:3.5179 train_time:250417ms step_avg:151.77ms step:1661/3200 train_loss:3.5390 train_time:250567ms step_avg:151.77ms step:1662/3200 train_loss:3.4558 train_time:250716ms step_avg:151.77ms step:1663/3200 train_loss:3.5525 train_time:250867ms step_avg:151.76ms step:1664/3200 train_loss:3.5532 train_time:251017ms step_avg:151.76ms step:1665/3200 train_loss:3.5889 train_time:251167ms step_avg:151.76ms step:1666/3200 train_loss:3.5536 train_time:251318ms step_avg:151.76ms step:1667/3200 train_loss:3.7079 train_time:251467ms step_avg:151.76ms step:1668/3200 train_loss:3.4092 train_time:251619ms step_avg:151.76ms step:1669/3200 train_loss:3.4894 train_time:251768ms step_avg:151.76ms step:1670/3200 train_loss:3.4146 train_time:251919ms step_avg:151.76ms step:1671/3200 train_loss:3.4227 train_time:252069ms step_avg:151.76ms step:1672/3200 train_loss:3.5715 train_time:252220ms step_avg:151.76ms step:1673/3200 train_loss:3.7709 train_time:252370ms step_avg:151.76ms step:1674/3200 train_loss:3.4739 train_time:252521ms step_avg:151.76ms step:1675/3200 train_loss:3.4602 train_time:252670ms step_avg:151.75ms step:1676/3200 train_loss:3.3463 train_time:252820ms step_avg:151.75ms step:1677/3200 train_loss:3.5537 train_time:252969ms step_avg:151.75ms step:1678/3200 train_loss:3.4599 train_time:253120ms step_avg:151.75ms step:1679/3200 train_loss:3.4958 train_time:253270ms step_avg:151.75ms step:1680/3200 train_loss:3.4855 train_time:253421ms step_avg:151.75ms step:1681/3200 train_loss:3.2919 train_time:253571ms step_avg:151.75ms step:1682/3200 train_loss:3.4887 train_time:253724ms step_avg:151.75ms step:1683/3200 train_loss:3.5035 train_time:253872ms step_avg:151.75ms step:1684/3200 train_loss:3.5513 train_time:254025ms step_avg:151.75ms step:1685/3200 train_loss:3.5410 train_time:254174ms step_avg:151.75ms step:1686/3200 train_loss:3.4500 train_time:254324ms step_avg:151.74ms step:1687/3200 train_loss:3.5560 train_time:254475ms step_avg:151.74ms step:1688/3200 train_loss:3.4403 train_time:254625ms step_avg:151.74ms step:1689/3200 train_loss:3.5190 train_time:254775ms step_avg:151.74ms step:1690/3200 train_loss:3.4363 train_time:254926ms step_avg:151.74ms step:1691/3200 train_loss:3.3385 train_time:255077ms step_avg:151.74ms step:1692/3200 train_loss:3.4914 train_time:255226ms step_avg:151.74ms step:1693/3200 train_loss:3.4838 train_time:255375ms step_avg:151.74ms step:1694/3200 train_loss:3.4019 train_time:255525ms step_avg:151.74ms step:1695/3200 train_loss:3.8442 train_time:255675ms step_avg:151.74ms step:1696/3200 train_loss:3.5596 train_time:255825ms step_avg:151.73ms step:1697/3200 train_loss:3.5455 train_time:255974ms step_avg:151.73ms step:1698/3200 train_loss:3.4475 train_time:256127ms step_avg:151.73ms step:1699/3200 train_loss:3.3507 train_time:256277ms step_avg:151.73ms step:1700/3200 train_loss:3.4428 train_time:256427ms step_avg:151.73ms step:1701/3200 train_loss:3.4479 train_time:256730ms step_avg:151.82ms step:1702/3200 train_loss:3.5198 train_time:256888ms step_avg:151.83ms step:1703/3200 train_loss:3.4407 train_time:257038ms step_avg:151.82ms step:1704/3200 train_loss:3.6480 train_time:257186ms step_avg:151.82ms step:1705/3200 train_loss:3.4065 train_time:257336ms step_avg:151.82ms step:1706/3200 train_loss:3.6332 train_time:257485ms step_avg:151.82ms step:1707/3200 train_loss:3.4737 train_time:257636ms step_avg:151.82ms step:1708/3200 train_loss:3.2582 train_time:257789ms step_avg:151.82ms step:1709/3200 train_loss:3.5909 train_time:257940ms step_avg:151.82ms step:1710/3200 train_loss:3.4980 train_time:258256ms step_avg:151.92ms step:1711/3200 train_loss:3.4843 train_time:258403ms step_avg:151.91ms step:1712/3200 train_loss:3.4846 train_time:258552ms step_avg:151.91ms step:1713/3200 train_loss:3.5166 train_time:258700ms step_avg:151.91ms step:1714/3200 train_loss:3.5445 train_time:258848ms step_avg:151.91ms step:1715/3200 train_loss:3.4670 train_time:258996ms step_avg:151.90ms step:1716/3200 train_loss:3.4792 train_time:259148ms step_avg:151.90ms step:1717/3200 train_loss:3.3087 train_time:259303ms step_avg:151.91ms step:1718/3200 train_loss:3.4496 train_time:259453ms step_avg:151.90ms step:1719/3200 train_loss:3.4669 train_time:259603ms step_avg:151.90ms step:1720/3200 train_loss:3.4213 train_time:259752ms step_avg:151.90ms step:1721/3200 train_loss:3.5743 train_time:259903ms step_avg:151.90ms step:1722/3200 train_loss:3.3770 train_time:260052ms step_avg:151.90ms step:1723/3200 train_loss:3.5198 train_time:260205ms step_avg:151.90ms step:1724/3200 train_loss:3.6042 train_time:260357ms step_avg:151.90ms step:1725/3200 train_loss:3.4560 train_time:260508ms step_avg:151.90ms step:1726/3200 train_loss:3.6788 train_time:260659ms step_avg:151.90ms step:1727/3200 train_loss:3.4721 train_time:260807ms step_avg:151.90ms step:1728/3200 train_loss:3.5301 train_time:260957ms step_avg:151.90ms step:1729/3200 train_loss:3.4999 train_time:261107ms step_avg:151.89ms step:1730/3200 train_loss:3.5144 train_time:261259ms step_avg:151.89ms step:1731/3200 train_loss:3.8734 train_time:261408ms step_avg:151.89ms step:1732/3200 train_loss:3.4966 train_time:261560ms step_avg:151.89ms step:1733/3200 train_loss:3.6200 train_time:261709ms step_avg:151.89ms step:1734/3200 train_loss:3.4044 train_time:261861ms step_avg:151.89ms step:1735/3200 train_loss:3.4452 train_time:262009ms step_avg:151.89ms step:1736/3200 train_loss:3.4642 train_time:262161ms step_avg:151.89ms step:1737/3200 train_loss:3.4459 train_time:262310ms step_avg:151.89ms step:1738/3200 train_loss:3.5869 train_time:262463ms step_avg:151.89ms step:1739/3200 train_loss:3.4534 train_time:262612ms step_avg:151.89ms step:1740/3200 train_loss:3.5080 train_time:262765ms step_avg:151.89ms step:1741/3200 train_loss:3.5722 train_time:262914ms step_avg:151.89ms step:1742/3200 train_loss:3.3693 train_time:263065ms step_avg:151.89ms step:1743/3200 train_loss:3.2626 train_time:263215ms step_avg:151.88ms step:1744/3200 train_loss:3.1995 train_time:263365ms step_avg:151.88ms step:1745/3200 train_loss:3.4897 train_time:263516ms step_avg:151.88ms step:1746/3200 train_loss:3.5020 train_time:263666ms step_avg:151.88ms step:1747/3200 train_loss:3.4694 train_time:263815ms step_avg:151.88ms step:1748/3200 train_loss:3.4860 train_time:263966ms step_avg:151.88ms step:1749/3200 train_loss:3.7223 train_time:264116ms step_avg:151.88ms step:1750/3200 train_loss:3.4307 train_time:264266ms step_avg:151.88ms step:1750/3200 val_loss:3.4857 train_time:264312ms step_avg:151.90ms step:1751/3200 train_loss:3.5051 train_time:264422ms step_avg:151.88ms step:1752/3200 train_loss:3.4953 train_time:264575ms step_avg:151.88ms step:1753/3200 train_loss:3.1367 train_time:264724ms step_avg:151.88ms step:1754/3200 train_loss:3.2532 train_time:264874ms step_avg:151.88ms step:1755/3200 train_loss:3.3601 train_time:265020ms step_avg:151.87ms step:1756/3200 train_loss:3.3014 train_time:265171ms step_avg:151.87ms step:1757/3200 train_loss:3.4562 train_time:265321ms step_avg:151.87ms step:1758/3200 train_loss:3.3419 train_time:265475ms step_avg:151.87ms step:1759/3200 train_loss:3.3361 train_time:265626ms step_avg:151.87ms step:1760/3200 train_loss:4.3973 train_time:265775ms step_avg:151.87ms step:1761/3200 train_loss:3.4714 train_time:265925ms step_avg:151.87ms step:1762/3200 train_loss:3.5087 train_time:266074ms step_avg:151.87ms step:1763/3200 train_loss:3.4982 train_time:266222ms step_avg:151.87ms step:1764/3200 train_loss:3.5220 train_time:266377ms step_avg:151.87ms step:1765/3200 train_loss:3.4374 train_time:266527ms step_avg:151.87ms step:1766/3200 train_loss:3.4815 train_time:266677ms step_avg:151.87ms step:1767/3200 train_loss:3.4900 train_time:266827ms step_avg:151.87ms step:1768/3200 train_loss:3.7408 train_time:266976ms step_avg:151.86ms step:1769/3200 train_loss:3.4699 train_time:267127ms step_avg:151.86ms step:1770/3200 train_loss:3.5381 train_time:267277ms step_avg:151.86ms step:1771/3200 train_loss:3.9141 train_time:267432ms step_avg:151.86ms step:1772/3200 train_loss:3.4719 train_time:267582ms step_avg:151.86ms step:1773/3200 train_loss:3.3735 train_time:267734ms step_avg:151.86ms step:1774/3200 train_loss:3.6300 train_time:267883ms step_avg:151.86ms step:1775/3200 train_loss:3.3868 train_time:268034ms step_avg:151.86ms step:1776/3200 train_loss:3.5334 train_time:268184ms step_avg:151.86ms step:1777/3200 train_loss:3.5868 train_time:268335ms step_avg:151.86ms step:1778/3200 train_loss:3.6735 train_time:268485ms step_avg:151.86ms step:1779/3200 train_loss:3.4791 train_time:268636ms step_avg:151.86ms step:1780/3200 train_loss:3.7746 train_time:268787ms step_avg:151.86ms step:1781/3200 train_loss:3.5490 train_time:268935ms step_avg:151.85ms step:1782/3200 train_loss:3.5662 train_time:269084ms step_avg:151.85ms step:1783/3200 train_loss:3.3519 train_time:269236ms step_avg:151.85ms step:1784/3200 train_loss:3.4369 train_time:269386ms step_avg:151.85ms step:1785/3200 train_loss:3.5846 train_time:269538ms step_avg:151.85ms step:1786/3200 train_loss:3.4683 train_time:269689ms step_avg:151.85ms step:1787/3200 train_loss:3.6329 train_time:269839ms step_avg:151.85ms step:1788/3200 train_loss:3.4485 train_time:269990ms step_avg:151.85ms step:1789/3200 train_loss:3.4287 train_time:270138ms step_avg:151.85ms step:1790/3200 train_loss:3.5699 train_time:270289ms step_avg:151.85ms step:1791/3200 train_loss:3.4719 train_time:270439ms step_avg:151.85ms step:1792/3200 train_loss:3.4161 train_time:270591ms step_avg:151.85ms step:1793/3200 train_loss:3.5517 train_time:270740ms step_avg:151.85ms step:1794/3200 train_loss:3.4250 train_time:270892ms step_avg:151.85ms step:1795/3200 train_loss:3.4148 train_time:271040ms step_avg:151.84ms step:1796/3200 train_loss:3.4795 train_time:271192ms step_avg:151.84ms step:1797/3200 train_loss:3.4438 train_time:271341ms step_avg:151.84ms step:1798/3200 train_loss:3.5770 train_time:271494ms step_avg:151.84ms step:1799/3200 train_loss:3.4627 train_time:271644ms step_avg:151.84ms step:1800/3200 train_loss:3.5423 train_time:271796ms step_avg:151.84ms step:1801/3200 train_loss:3.4674 train_time:271946ms step_avg:151.84ms step:1802/3200 train_loss:3.5070 train_time:272096ms step_avg:151.84ms step:1803/3200 train_loss:3.4093 train_time:272247ms step_avg:151.84ms step:1804/3200 train_loss:3.3472 train_time:272396ms step_avg:151.84ms step:1805/3200 train_loss:3.5965 train_time:272547ms step_avg:151.84ms step:1806/3200 train_loss:3.5166 train_time:272697ms step_avg:151.84ms step:1807/3200 train_loss:3.5307 train_time:272848ms step_avg:151.84ms step:1808/3200 train_loss:3.6322 train_time:272997ms step_avg:151.83ms step:1809/3200 train_loss:3.4331 train_time:273148ms step_avg:151.83ms step:1810/3200 train_loss:3.5332 train_time:273298ms step_avg:151.83ms step:1811/3200 train_loss:3.6666 train_time:273449ms step_avg:151.83ms step:1812/3200 train_loss:3.5211 train_time:273598ms step_avg:151.83ms step:1813/3200 train_loss:3.5695 train_time:273749ms step_avg:151.83ms step:1814/3200 train_loss:3.5865 train_time:273899ms step_avg:151.83ms step:1815/3200 train_loss:3.5342 train_time:274048ms step_avg:151.83ms step:1816/3200 train_loss:3.5632 train_time:274198ms step_avg:151.83ms step:1817/3200 train_loss:3.5226 train_time:274347ms step_avg:151.82ms step:1818/3200 train_loss:3.5759 train_time:274498ms step_avg:151.82ms step:1819/3200 train_loss:3.4972 train_time:274648ms step_avg:151.82ms step:1820/3200 train_loss:3.4896 train_time:274797ms step_avg:151.82ms step:1821/3200 train_loss:3.4468 train_time:274948ms step_avg:151.82ms step:1822/3200 train_loss:3.4219 train_time:275097ms step_avg:151.82ms step:1823/3200 train_loss:3.3519 train_time:275248ms step_avg:151.82ms step:1824/3200 train_loss:3.5059 train_time:275398ms step_avg:151.82ms step:1825/3200 train_loss:3.6227 train_time:275549ms step_avg:151.82ms step:1826/3200 train_loss:3.5830 train_time:275699ms step_avg:151.82ms step:1827/3200 train_loss:3.5625 train_time:275849ms step_avg:151.82ms step:1828/3200 train_loss:3.4272 train_time:275998ms step_avg:151.81ms step:1829/3200 train_loss:3.4543 train_time:276148ms step_avg:151.81ms step:1830/3200 train_loss:3.5886 train_time:276298ms step_avg:151.81ms step:1831/3200 train_loss:3.3620 train_time:276449ms step_avg:151.81ms step:1832/3200 train_loss:3.5131 train_time:276598ms step_avg:151.81ms step:1833/3200 train_loss:3.3966 train_time:276750ms step_avg:151.81ms step:1834/3200 train_loss:3.7124 train_time:276899ms step_avg:151.81ms step:1835/3200 train_loss:3.5472 train_time:277052ms step_avg:151.81ms step:1836/3200 train_loss:3.5287 train_time:277201ms step_avg:151.81ms step:1837/3200 train_loss:3.6568 train_time:277353ms step_avg:151.81ms step:1838/3200 train_loss:3.5111 train_time:277502ms step_avg:151.81ms step:1839/3200 train_loss:3.3951 train_time:277654ms step_avg:151.81ms step:1840/3200 train_loss:3.5077 train_time:277803ms step_avg:151.81ms step:1841/3200 train_loss:3.3997 train_time:277955ms step_avg:151.80ms step:1842/3200 train_loss:3.5034 train_time:278103ms step_avg:151.80ms step:1843/3200 train_loss:3.5615 train_time:278255ms step_avg:151.80ms step:1844/3200 train_loss:3.3130 train_time:278404ms step_avg:151.80ms step:1845/3200 train_loss:3.4339 train_time:278556ms step_avg:151.80ms step:1846/3200 train_loss:3.5004 train_time:278707ms step_avg:151.80ms step:1847/3200 train_loss:3.4340 train_time:278858ms step_avg:151.80ms step:1848/3200 train_loss:3.3385 train_time:279008ms step_avg:151.80ms step:1849/3200 train_loss:3.6032 train_time:279157ms step_avg:151.80ms step:1850/3200 train_loss:3.3761 train_time:279309ms step_avg:151.80ms step:1851/3200 train_loss:3.4568 train_time:279459ms step_avg:151.80ms step:1852/3200 train_loss:3.4128 train_time:279611ms step_avg:151.80ms step:1853/3200 train_loss:3.6100 train_time:279761ms step_avg:151.80ms step:1854/3200 train_loss:3.5837 train_time:279912ms step_avg:151.80ms step:1855/3200 train_loss:3.4668 train_time:280061ms step_avg:151.79ms step:1856/3200 train_loss:3.4217 train_time:280213ms step_avg:151.79ms step:1857/3200 train_loss:3.4425 train_time:280364ms step_avg:151.79ms step:1858/3200 train_loss:3.6968 train_time:280606ms step_avg:151.84ms step:1859/3200 train_loss:3.5402 train_time:280710ms step_avg:151.82ms step:1860/3200 train_loss:3.4739 train_time:280857ms step_avg:151.81ms step:1861/3200 train_loss:3.5148 train_time:281007ms step_avg:151.81ms step:1862/3200 train_loss:3.4071 train_time:281155ms step_avg:151.81ms step:1863/3200 train_loss:3.4044 train_time:281303ms step_avg:151.81ms step:1864/3200 train_loss:3.4722 train_time:281454ms step_avg:151.81ms step:1865/3200 train_loss:3.5118 train_time:281606ms step_avg:151.81ms step:1866/3200 train_loss:3.2766 train_time:281758ms step_avg:151.81ms step:1867/3200 train_loss:3.4130 train_time:281909ms step_avg:151.81ms step:1868/3200 train_loss:3.3627 train_time:282057ms step_avg:151.81ms step:1869/3200 train_loss:3.3677 train_time:282206ms step_avg:151.81ms step:1870/3200 train_loss:3.5234 train_time:282355ms step_avg:151.80ms step:1871/3200 train_loss:3.5068 train_time:282504ms step_avg:151.80ms step:1872/3200 train_loss:3.4511 train_time:282656ms step_avg:151.80ms step:1873/3200 train_loss:3.4608 train_time:282808ms step_avg:151.80ms step:1874/3200 train_loss:3.4014 train_time:282958ms step_avg:151.80ms step:1875/3200 train_loss:3.5006 train_time:283108ms step_avg:151.80ms step:1875/3200 val_loss:3.4728 train_time:283153ms step_avg:151.82ms step:1876/3200 train_loss:3.4969 train_time:283265ms step_avg:151.80ms step:1877/3200 train_loss:3.4248 train_time:283417ms step_avg:151.80ms step:1878/3200 train_loss:3.4677 train_time:283565ms step_avg:151.80ms step:1879/3200 train_loss:3.5759 train_time:283714ms step_avg:151.80ms step:1880/3200 train_loss:3.4563 train_time:283862ms step_avg:151.80ms step:1881/3200 train_loss:3.5101 train_time:284011ms step_avg:151.80ms step:1882/3200 train_loss:3.4255 train_time:284161ms step_avg:151.80ms step:1883/3200 train_loss:3.4930 train_time:284314ms step_avg:151.80ms step:1884/3200 train_loss:3.4955 train_time:284467ms step_avg:151.80ms step:1885/3200 train_loss:3.2519 train_time:284620ms step_avg:151.80ms step:1886/3200 train_loss:3.6518 train_time:284768ms step_avg:151.80ms step:1887/3200 train_loss:3.3824 train_time:284920ms step_avg:151.80ms step:1888/3200 train_loss:3.3973 train_time:285068ms step_avg:151.79ms step:1889/3200 train_loss:3.4746 train_time:285220ms step_avg:151.79ms step:1890/3200 train_loss:3.5165 train_time:285523ms step_avg:151.87ms step:1891/3200 train_loss:3.3413 train_time:285682ms step_avg:151.88ms step:1892/3200 train_loss:3.6128 train_time:285831ms step_avg:151.88ms step:1893/3200 train_loss:3.3667 train_time:285981ms step_avg:151.88ms step:1894/3200 train_loss:3.5038 train_time:286129ms step_avg:151.87ms step:1895/3200 train_loss:3.5353 train_time:286279ms step_avg:151.87ms step:1896/3200 train_loss:3.3397 train_time:286429ms step_avg:151.87ms step:1897/3200 train_loss:3.5013 train_time:286583ms step_avg:151.87ms step:1898/3200 train_loss:3.4586 train_time:286735ms step_avg:151.87ms step:1899/3200 train_loss:3.5408 train_time:286884ms step_avg:151.87ms step:1900/3200 train_loss:3.3224 train_time:287206ms step_avg:151.96ms step:1901/3200 train_loss:3.5585 train_time:287357ms step_avg:151.96ms step:1902/3200 train_loss:3.4493 train_time:287505ms step_avg:151.96ms step:1903/3200 train_loss:3.6090 train_time:287653ms step_avg:151.96ms step:1904/3200 train_loss:3.4158 train_time:287801ms step_avg:151.95ms step:1905/3200 train_loss:3.6896 train_time:287949ms step_avg:151.95ms step:1906/3200 train_loss:3.4232 train_time:288103ms step_avg:151.95ms step:1907/3200 train_loss:3.4171 train_time:288258ms step_avg:151.95ms step:1908/3200 train_loss:3.4977 train_time:288407ms step_avg:151.95ms step:1909/3200 train_loss:3.3734 train_time:288557ms step_avg:151.95ms step:1910/3200 train_loss:3.4419 train_time:288704ms step_avg:151.95ms step:1911/3200 train_loss:3.5371 train_time:288853ms step_avg:151.95ms step:1912/3200 train_loss:3.4592 train_time:289002ms step_avg:151.95ms step:1913/3200 train_loss:3.3401 train_time:289154ms step_avg:151.95ms step:1914/3200 train_loss:3.2217 train_time:289306ms step_avg:151.95ms step:1915/3200 train_loss:3.4147 train_time:289457ms step_avg:151.95ms step:1916/3200 train_loss:3.6341 train_time:289605ms step_avg:151.94ms step:1917/3200 train_loss:3.6253 train_time:289756ms step_avg:151.94ms step:1918/3200 train_loss:3.5834 train_time:289905ms step_avg:151.94ms step:1919/3200 train_loss:3.4013 train_time:290055ms step_avg:151.94ms step:1920/3200 train_loss:3.6540 train_time:290205ms step_avg:151.94ms step:1921/3200 train_loss:3.4704 train_time:290358ms step_avg:151.94ms step:1922/3200 train_loss:3.4070 train_time:290508ms step_avg:151.94ms step:1923/3200 train_loss:3.5819 train_time:290659ms step_avg:151.94ms step:1924/3200 train_loss:3.5478 train_time:290809ms step_avg:151.94ms step:1925/3200 train_loss:3.3879 train_time:290961ms step_avg:151.94ms step:1926/3200 train_loss:3.4162 train_time:291111ms step_avg:151.94ms step:1927/3200 train_loss:3.3285 train_time:291262ms step_avg:151.94ms step:1928/3200 train_loss:3.4409 train_time:291413ms step_avg:151.94ms step:1929/3200 train_loss:3.2965 train_time:291563ms step_avg:151.94ms step:1930/3200 train_loss:3.4118 train_time:291714ms step_avg:151.93ms step:1931/3200 train_loss:3.5442 train_time:291863ms step_avg:151.93ms step:1932/3200 train_loss:3.4146 train_time:292014ms step_avg:151.93ms step:1933/3200 train_loss:3.5585 train_time:292164ms step_avg:151.93ms step:1934/3200 train_loss:3.4276 train_time:292315ms step_avg:151.93ms step:1935/3200 train_loss:3.4780 train_time:292464ms step_avg:151.93ms step:1936/3200 train_loss:3.5141 train_time:292615ms step_avg:151.93ms step:1937/3200 train_loss:3.4663 train_time:292765ms step_avg:151.93ms step:1938/3200 train_loss:3.4921 train_time:292916ms step_avg:151.93ms step:1939/3200 train_loss:3.4245 train_time:293065ms step_avg:151.93ms step:1940/3200 train_loss:3.5182 train_time:293217ms step_avg:151.93ms step:1941/3200 train_loss:3.5474 train_time:293367ms step_avg:151.92ms step:1942/3200 train_loss:3.3882 train_time:293519ms step_avg:151.93ms step:1943/3200 train_loss:3.4269 train_time:293668ms step_avg:151.92ms step:1944/3200 train_loss:3.4885 train_time:293821ms step_avg:151.92ms step:1945/3200 train_loss:3.3346 train_time:293970ms step_avg:151.92ms step:1946/3200 train_loss:3.6038 train_time:294122ms step_avg:151.92ms step:1947/3200 train_loss:3.4760 train_time:294272ms step_avg:151.92ms step:1948/3200 train_loss:3.4573 train_time:294422ms step_avg:151.92ms step:1949/3200 train_loss:3.4564 train_time:294571ms step_avg:151.92ms step:1950/3200 train_loss:3.3369 train_time:294722ms step_avg:151.92ms step:1951/3200 train_loss:3.4607 train_time:294872ms step_avg:151.92ms step:1952/3200 train_loss:3.3105 train_time:295022ms step_avg:151.92ms step:1953/3200 train_loss:3.5110 train_time:295173ms step_avg:151.92ms step:1954/3200 train_loss:3.5116 train_time:295323ms step_avg:151.92ms step:1955/3200 train_loss:3.4634 train_time:295473ms step_avg:151.91ms step:1956/3200 train_loss:3.3529 train_time:295623ms step_avg:151.91ms step:1957/3200 train_loss:3.4405 train_time:295773ms step_avg:151.91ms step:1958/3200 train_loss:3.6229 train_time:295924ms step_avg:151.91ms step:1959/3200 train_loss:3.5436 train_time:296075ms step_avg:151.91ms step:1960/3200 train_loss:3.5677 train_time:296225ms step_avg:151.91ms step:1961/3200 train_loss:3.3647 train_time:296376ms step_avg:151.91ms step:1962/3200 train_loss:3.4926 train_time:296525ms step_avg:151.91ms step:1963/3200 train_loss:3.5309 train_time:296675ms step_avg:151.91ms step:1964/3200 train_loss:3.4765 train_time:296825ms step_avg:151.91ms step:1965/3200 train_loss:3.3882 train_time:296976ms step_avg:151.91ms step:1966/3200 train_loss:3.7926 train_time:297125ms step_avg:151.90ms step:1967/3200 train_loss:3.3999 train_time:297277ms step_avg:151.90ms step:1968/3200 train_loss:3.4493 train_time:297427ms step_avg:151.90ms step:1969/3200 train_loss:3.4961 train_time:297578ms step_avg:151.90ms step:1970/3200 train_loss:3.4535 train_time:297726ms step_avg:151.90ms step:1971/3200 train_loss:3.3443 train_time:297877ms step_avg:151.90ms step:1972/3200 train_loss:3.3298 train_time:298026ms step_avg:151.90ms step:1973/3200 train_loss:3.4480 train_time:298178ms step_avg:151.90ms step:1974/3200 train_loss:3.4114 train_time:298327ms step_avg:151.90ms step:1975/3200 train_loss:3.3948 train_time:298478ms step_avg:151.90ms step:1976/3200 train_loss:3.5536 train_time:298627ms step_avg:151.90ms step:1977/3200 train_loss:3.4162 train_time:298777ms step_avg:151.89ms step:1978/3200 train_loss:3.7799 train_time:298927ms step_avg:151.89ms step:1979/3200 train_loss:3.4635 train_time:299079ms step_avg:151.89ms step:1980/3200 train_loss:3.4652 train_time:299227ms step_avg:151.89ms step:1981/3200 train_loss:3.4721 train_time:299379ms step_avg:151.89ms step:1982/3200 train_loss:3.4992 train_time:299528ms step_avg:151.89ms step:1983/3200 train_loss:3.4264 train_time:299680ms step_avg:151.89ms step:1984/3200 train_loss:3.3925 train_time:299829ms step_avg:151.89ms step:1985/3200 train_loss:3.4518 train_time:299981ms step_avg:151.89ms step:1986/3200 train_loss:3.5094 train_time:300131ms step_avg:151.89ms step:1987/3200 train_loss:3.4854 train_time:300282ms step_avg:151.89ms step:1988/3200 train_loss:3.4584 train_time:300433ms step_avg:151.89ms step:1989/3200 train_loss:3.5371 train_time:300582ms step_avg:151.89ms step:1990/3200 train_loss:3.5692 train_time:300732ms step_avg:151.89ms step:1991/3200 train_loss:3.3502 train_time:300884ms step_avg:151.88ms step:1992/3200 train_loss:3.3421 train_time:301035ms step_avg:151.88ms step:1993/3200 train_loss:3.5267 train_time:301184ms step_avg:151.88ms step:1994/3200 train_loss:3.3547 train_time:301334ms step_avg:151.88ms step:1995/3200 train_loss:3.4375 train_time:301484ms step_avg:151.88ms step:1996/3200 train_loss:3.5174 train_time:301634ms step_avg:151.88ms step:1997/3200 train_loss:3.3758 train_time:301784ms step_avg:151.88ms step:1998/3200 train_loss:3.4851 train_time:301935ms step_avg:151.88ms step:1999/3200 train_loss:3.4806 train_time:302085ms step_avg:151.88ms step:2000/3200 train_loss:3.4037 train_time:302236ms step_avg:151.88ms step:2000/3200 val_loss:3.4585 train_time:302282ms step_avg:151.90ms step:2001/3200 train_loss:3.5508 train_time:302393ms step_avg:151.88ms step:2002/3200 train_loss:3.4878 train_time:302545ms step_avg:151.88ms step:2003/3200 train_loss:3.5803 train_time:302693ms step_avg:151.88ms step:2004/3200 train_loss:3.4979 train_time:302842ms step_avg:151.88ms step:2005/3200 train_loss:3.5054 train_time:302990ms step_avg:151.87ms step:2006/3200 train_loss:3.3987 train_time:303140ms step_avg:151.87ms step:2007/3200 train_loss:3.4258 train_time:303289ms step_avg:151.87ms step:2008/3200 train_loss:3.4696 train_time:303445ms step_avg:151.87ms step:2009/3200 train_loss:3.5114 train_time:303595ms step_avg:151.87ms step:2010/3200 train_loss:3.4137 train_time:303747ms step_avg:151.87ms step:2011/3200 train_loss:3.4933 train_time:303895ms step_avg:151.87ms step:2012/3200 train_loss:3.4657 train_time:304044ms step_avg:151.87ms step:2013/3200 train_loss:3.4681 train_time:304192ms step_avg:151.87ms step:2014/3200 train_loss:3.3929 train_time:304344ms step_avg:151.87ms step:2015/3200 train_loss:3.4328 train_time:304495ms step_avg:151.87ms step:2016/3200 train_loss:3.4502 train_time:304647ms step_avg:151.87ms step:2017/3200 train_loss:3.5785 train_time:304797ms step_avg:151.87ms step:2018/3200 train_loss:3.4334 train_time:304948ms step_avg:151.87ms step:2019/3200 train_loss:3.5799 train_time:305097ms step_avg:151.86ms step:2020/3200 train_loss:3.5932 train_time:305249ms step_avg:151.87ms step:2021/3200 train_loss:3.2999 train_time:305399ms step_avg:151.86ms step:2022/3200 train_loss:3.5336 train_time:305550ms step_avg:151.86ms step:2023/3200 train_loss:3.4622 train_time:305701ms step_avg:151.86ms step:2024/3200 train_loss:3.5582 train_time:305852ms step_avg:151.86ms step:2025/3200 train_loss:3.5957 train_time:306001ms step_avg:151.86ms step:2026/3200 train_loss:3.3812 train_time:306151ms step_avg:151.86ms step:2027/3200 train_loss:3.4206 train_time:306300ms step_avg:151.86ms step:2028/3200 train_loss:3.3242 train_time:306451ms step_avg:151.86ms step:2029/3200 train_loss:3.4356 train_time:306602ms step_avg:151.86ms step:2030/3200 train_loss:3.3576 train_time:306752ms step_avg:151.86ms step:2031/3200 train_loss:3.4494 train_time:306904ms step_avg:151.86ms step:2032/3200 train_loss:3.4459 train_time:307053ms step_avg:151.86ms step:2033/3200 train_loss:3.4600 train_time:307204ms step_avg:151.86ms step:2034/3200 train_loss:3.3555 train_time:307353ms step_avg:151.85ms step:2035/3200 train_loss:3.5178 train_time:307505ms step_avg:151.85ms step:2036/3200 train_loss:3.5176 train_time:307654ms step_avg:151.85ms step:2037/3200 train_loss:3.5047 train_time:307806ms step_avg:151.85ms step:2038/3200 train_loss:3.3752 train_time:307956ms step_avg:151.85ms step:2039/3200 train_loss:3.6342 train_time:308108ms step_avg:151.85ms step:2040/3200 train_loss:3.4688 train_time:308258ms step_avg:151.85ms step:2041/3200 train_loss:3.4905 train_time:308409ms step_avg:151.85ms step:2042/3200 train_loss:3.4449 train_time:308559ms step_avg:151.85ms step:2043/3200 train_loss:3.3381 train_time:308710ms step_avg:151.85ms step:2044/3200 train_loss:3.4622 train_time:308859ms step_avg:151.85ms step:2045/3200 train_loss:3.4516 train_time:309010ms step_avg:151.85ms step:2046/3200 train_loss:3.3228 train_time:309159ms step_avg:151.85ms step:2047/3200 train_loss:3.3991 train_time:309310ms step_avg:151.85ms step:2048/3200 train_loss:3.4771 train_time:309460ms step_avg:151.84ms step:2049/3200 train_loss:3.4228 train_time:309610ms step_avg:151.84ms step:2050/3200 train_loss:3.4755 train_time:309759ms step_avg:151.84ms step:2051/3200 train_loss:3.6174 train_time:309909ms step_avg:151.84ms step:2052/3200 train_loss:3.4824 train_time:310059ms step_avg:151.84ms step:2053/3200 train_loss:3.4370 train_time:310210ms step_avg:151.84ms step:2054/3200 train_loss:3.4160 train_time:310359ms step_avg:151.84ms step:2055/3200 train_loss:3.2884 train_time:310510ms step_avg:151.84ms step:2056/3200 train_loss:3.3924 train_time:310660ms step_avg:151.84ms step:2057/3200 train_loss:3.5710 train_time:310809ms step_avg:151.84ms step:2058/3200 train_loss:3.5914 train_time:310959ms step_avg:151.84ms step:2059/3200 train_loss:3.4521 train_time:311111ms step_avg:151.84ms step:2060/3200 train_loss:3.4969 train_time:311261ms step_avg:151.83ms step:2061/3200 train_loss:3.4843 train_time:311410ms step_avg:151.83ms step:2062/3200 train_loss:3.4320 train_time:311560ms step_avg:151.83ms step:2063/3200 train_loss:3.3500 train_time:311711ms step_avg:151.83ms step:2064/3200 train_loss:3.6588 train_time:311862ms step_avg:151.83ms step:2065/3200 train_loss:3.5148 train_time:312011ms step_avg:151.83ms step:2066/3200 train_loss:3.4696 train_time:312163ms step_avg:151.83ms step:2067/3200 train_loss:3.5055 train_time:312313ms step_avg:151.83ms step:2068/3200 train_loss:3.4082 train_time:312465ms step_avg:151.83ms step:2069/3200 train_loss:3.4620 train_time:312614ms step_avg:151.83ms step:2070/3200 train_loss:3.6008 train_time:312764ms step_avg:151.83ms step:2071/3200 train_loss:3.6060 train_time:312914ms step_avg:151.83ms step:2072/3200 train_loss:3.4515 train_time:313066ms step_avg:151.83ms step:2073/3200 train_loss:3.4868 train_time:313215ms step_avg:151.82ms step:2074/3200 train_loss:3.3743 train_time:313367ms step_avg:151.83ms step:2075/3200 train_loss:3.9047 train_time:313518ms step_avg:151.82ms step:2076/3200 train_loss:3.3279 train_time:313669ms step_avg:151.82ms step:2077/3200 train_loss:3.4981 train_time:313818ms step_avg:151.82ms step:2078/3200 train_loss:3.3846 train_time:313969ms step_avg:151.82ms step:2079/3200 train_loss:3.3690 train_time:314278ms step_avg:151.90ms step:2080/3200 train_loss:3.4572 train_time:314433ms step_avg:151.90ms step:2081/3200 train_loss:3.7131 train_time:314582ms step_avg:151.90ms step:2082/3200 train_loss:3.3423 train_time:314731ms step_avg:151.90ms step:2083/3200 train_loss:3.6794 train_time:314880ms step_avg:151.90ms step:2084/3200 train_loss:3.3788 train_time:315028ms step_avg:151.89ms step:2085/3200 train_loss:3.3649 train_time:315178ms step_avg:151.89ms step:2086/3200 train_loss:3.6059 train_time:315333ms step_avg:151.89ms step:2087/3200 train_loss:3.5342 train_time:315484ms step_avg:151.89ms step:2088/3200 train_loss:3.5204 train_time:315633ms step_avg:151.89ms step:2089/3200 train_loss:3.5812 train_time:315783ms step_avg:151.89ms step:2090/3200 train_loss:3.5049 train_time:316099ms step_avg:151.97ms step:2091/3200 train_loss:3.4959 train_time:316247ms step_avg:151.97ms step:2092/3200 train_loss:3.4437 train_time:316395ms step_avg:151.97ms step:2093/3200 train_loss:3.5149 train_time:316546ms step_avg:151.97ms step:2094/3200 train_loss:3.4254 train_time:316693ms step_avg:151.96ms step:2095/3200 train_loss:3.2103 train_time:316843ms step_avg:151.96ms step:2096/3200 train_loss:3.4343 train_time:316992ms step_avg:151.96ms step:2097/3200 train_loss:3.6115 train_time:317147ms step_avg:151.96ms step:2098/3200 train_loss:3.4316 train_time:317297ms step_avg:151.96ms step:2099/3200 train_loss:3.3293 train_time:317448ms step_avg:151.96ms step:2100/3200 train_loss:3.4290 train_time:317597ms step_avg:151.96ms step:2101/3200 train_loss:3.3887 train_time:317748ms step_avg:151.96ms step:2102/3200 train_loss:3.5290 train_time:317897ms step_avg:151.96ms step:2103/3200 train_loss:3.3665 train_time:318049ms step_avg:151.96ms step:2104/3200 train_loss:3.3405 train_time:318201ms step_avg:151.96ms step:2105/3200 train_loss:3.5908 train_time:318354ms step_avg:151.96ms step:2106/3200 train_loss:3.3183 train_time:318506ms step_avg:151.96ms step:2107/3200 train_loss:3.7217 train_time:318654ms step_avg:151.96ms step:2108/3200 train_loss:3.5518 train_time:318805ms step_avg:151.96ms step:2109/3200 train_loss:3.4560 train_time:318953ms step_avg:151.95ms step:2110/3200 train_loss:3.4773 train_time:319106ms step_avg:151.96ms step:2111/3200 train_loss:3.2985 train_time:319257ms step_avg:151.95ms step:2112/3200 train_loss:3.7762 train_time:319409ms step_avg:151.95ms step:2113/3200 train_loss:3.4789 train_time:319558ms step_avg:151.95ms step:2114/3200 train_loss:3.4032 train_time:319710ms step_avg:151.95ms step:2115/3200 train_loss:3.5183 train_time:319859ms step_avg:151.95ms step:2116/3200 train_loss:3.4694 train_time:320009ms step_avg:151.95ms step:2117/3200 train_loss:3.4655 train_time:320158ms step_avg:151.95ms step:2118/3200 train_loss:3.5171 train_time:320310ms step_avg:151.95ms step:2119/3200 train_loss:3.3721 train_time:320460ms step_avg:151.95ms step:2120/3200 train_loss:3.4387 train_time:320612ms step_avg:151.95ms step:2121/3200 train_loss:3.1348 train_time:320763ms step_avg:151.95ms step:2122/3200 train_loss:3.3363 train_time:320913ms step_avg:151.95ms step:2123/3200 train_loss:3.5025 train_time:321064ms step_avg:151.95ms step:2124/3200 train_loss:3.4112 train_time:321213ms step_avg:151.95ms step:2125/3200 train_loss:3.5785 train_time:321364ms step_avg:151.95ms step:2125/3200 val_loss:3.4473 train_time:321410ms step_avg:151.97ms step:2126/3200 train_loss:3.4367 train_time:321521ms step_avg:151.95ms step:2127/3200 train_loss:3.5464 train_time:321672ms step_avg:151.95ms step:2128/3200 train_loss:3.5258 train_time:321820ms step_avg:151.95ms step:2129/3200 train_loss:3.3920 train_time:321969ms step_avg:151.94ms step:2130/3200 train_loss:3.3803 train_time:322116ms step_avg:151.94ms step:2131/3200 train_loss:3.4059 train_time:322264ms step_avg:151.94ms step:2132/3200 train_loss:3.5549 train_time:322416ms step_avg:151.94ms step:2133/3200 train_loss:3.4332 train_time:322570ms step_avg:151.94ms step:2134/3200 train_loss:3.3311 train_time:322722ms step_avg:151.94ms step:2135/3200 train_loss:3.4029 train_time:322874ms step_avg:151.94ms step:2136/3200 train_loss:3.5267 train_time:323022ms step_avg:151.94ms step:2137/3200 train_loss:3.5442 train_time:323173ms step_avg:151.94ms step:2138/3200 train_loss:3.4856 train_time:323322ms step_avg:151.94ms step:2139/3200 train_loss:3.4738 train_time:323475ms step_avg:151.94ms step:2140/3200 train_loss:3.4625 train_time:323626ms step_avg:151.94ms step:2141/3200 train_loss:3.5419 train_time:323777ms step_avg:151.94ms step:2142/3200 train_loss:3.8437 train_time:323927ms step_avg:151.94ms step:2143/3200 train_loss:3.3683 train_time:324077ms step_avg:151.93ms step:2144/3200 train_loss:3.4073 train_time:324226ms step_avg:151.93ms step:2145/3200 train_loss:3.4538 train_time:324376ms step_avg:151.93ms step:2146/3200 train_loss:3.5843 train_time:324526ms step_avg:151.93ms step:2147/3200 train_loss:3.5055 train_time:324678ms step_avg:151.93ms step:2148/3200 train_loss:3.9159 train_time:324828ms step_avg:151.93ms step:2149/3200 train_loss:3.4337 train_time:324977ms step_avg:151.93ms step:2150/3200 train_loss:3.3963 train_time:325126ms step_avg:151.93ms step:2151/3200 train_loss:3.4751 train_time:325276ms step_avg:151.93ms step:2152/3200 train_loss:3.5007 train_time:325425ms step_avg:151.93ms step:2153/3200 train_loss:3.4581 train_time:325577ms step_avg:151.93ms step:2154/3200 train_loss:3.3934 train_time:325728ms step_avg:151.93ms step:2155/3200 train_loss:3.6011 train_time:325880ms step_avg:151.93ms step:2156/3200 train_loss:3.2271 train_time:326030ms step_avg:151.92ms step:2157/3200 train_loss:3.3841 train_time:326179ms step_avg:151.92ms step:2158/3200 train_loss:3.5157 train_time:326329ms step_avg:151.92ms step:2159/3200 train_loss:3.4591 train_time:326479ms step_avg:151.92ms step:2160/3200 train_loss:3.6204 train_time:326630ms step_avg:151.92ms step:2161/3200 train_loss:3.5231 train_time:326780ms step_avg:151.92ms step:2162/3200 train_loss:3.4563 train_time:326932ms step_avg:151.92ms step:2163/3200 train_loss:3.4253 train_time:327080ms step_avg:151.92ms step:2164/3200 train_loss:3.4184 train_time:327229ms step_avg:151.92ms step:2165/3200 train_loss:3.5061 train_time:327379ms step_avg:151.92ms step:2166/3200 train_loss:3.5320 train_time:327529ms step_avg:151.92ms step:2167/3200 train_loss:3.4616 train_time:327679ms step_avg:151.91ms step:2168/3200 train_loss:3.3556 train_time:327830ms step_avg:151.91ms step:2169/3200 train_loss:3.4449 train_time:327980ms step_avg:151.91ms step:2170/3200 train_loss:3.4768 train_time:328132ms step_avg:151.91ms step:2171/3200 train_loss:3.6030 train_time:328281ms step_avg:151.91ms step:2172/3200 train_loss:3.3955 train_time:328432ms step_avg:151.91ms step:2173/3200 train_loss:3.3830 train_time:328581ms step_avg:151.91ms step:2174/3200 train_loss:3.3998 train_time:328733ms step_avg:151.91ms step:2175/3200 train_loss:3.4452 train_time:328882ms step_avg:151.91ms step:2176/3200 train_loss:3.4143 train_time:329033ms step_avg:151.91ms step:2177/3200 train_loss:3.3837 train_time:329183ms step_avg:151.91ms step:2178/3200 train_loss:3.5992 train_time:329335ms step_avg:151.91ms step:2179/3200 train_loss:3.4262 train_time:329485ms step_avg:151.91ms step:2180/3200 train_loss:3.4398 train_time:329636ms step_avg:151.91ms step:2181/3200 train_loss:3.4924 train_time:329785ms step_avg:151.90ms step:2182/3200 train_loss:3.4696 train_time:329937ms step_avg:151.90ms step:2183/3200 train_loss:3.4389 train_time:330086ms step_avg:151.90ms step:2184/3200 train_loss:3.3418 train_time:330236ms step_avg:151.90ms step:2185/3200 train_loss:3.5147 train_time:330385ms step_avg:151.90ms step:2186/3200 train_loss:3.6813 train_time:330537ms step_avg:151.90ms step:2187/3200 train_loss:3.3207 train_time:330688ms step_avg:151.90ms step:2188/3200 train_loss:3.3683 train_time:330837ms step_avg:151.90ms step:2189/3200 train_loss:3.2108 train_time:330987ms step_avg:151.90ms step:2190/3200 train_loss:3.3674 train_time:331138ms step_avg:151.90ms step:2191/3200 train_loss:3.5162 train_time:331289ms step_avg:151.90ms step:2192/3200 train_loss:3.4466 train_time:331439ms step_avg:151.90ms step:2193/3200 train_loss:3.6813 train_time:331590ms step_avg:151.90ms step:2194/3200 train_loss:3.4473 train_time:331740ms step_avg:151.90ms step:2195/3200 train_loss:3.5067 train_time:331890ms step_avg:151.89ms step:2196/3200 train_loss:3.4520 train_time:332041ms step_avg:151.89ms step:2197/3200 train_loss:3.3774 train_time:332193ms step_avg:151.89ms step:2198/3200 train_loss:3.4534 train_time:332342ms step_avg:151.89ms step:2199/3200 train_loss:3.3938 train_time:332494ms step_avg:151.89ms step:2200/3200 train_loss:3.4015 train_time:332643ms step_avg:151.89ms step:2201/3200 train_loss:3.4480 train_time:332795ms step_avg:151.89ms step:2202/3200 train_loss:3.4309 train_time:332945ms step_avg:151.89ms step:2203/3200 train_loss:3.4184 train_time:333096ms step_avg:151.89ms step:2204/3200 train_loss:3.9076 train_time:333246ms step_avg:151.89ms step:2205/3200 train_loss:3.3304 train_time:333397ms step_avg:151.89ms step:2206/3200 train_loss:3.4500 train_time:333548ms step_avg:151.89ms step:2207/3200 train_loss:3.4714 train_time:333700ms step_avg:151.89ms step:2208/3200 train_loss:3.4804 train_time:333851ms step_avg:151.89ms step:2209/3200 train_loss:3.3808 train_time:334000ms step_avg:151.89ms step:2210/3200 train_loss:3.4566 train_time:334152ms step_avg:151.89ms step:2211/3200 train_loss:3.4654 train_time:334301ms step_avg:151.89ms step:2212/3200 train_loss:3.4624 train_time:334452ms step_avg:151.89ms step:2213/3200 train_loss:3.4862 train_time:334603ms step_avg:151.89ms step:2214/3200 train_loss:3.3434 train_time:334755ms step_avg:151.89ms step:2215/3200 train_loss:3.4079 train_time:334904ms step_avg:151.88ms step:2216/3200 train_loss:3.5458 train_time:335056ms step_avg:151.88ms step:2217/3200 train_loss:3.4912 train_time:335206ms step_avg:151.88ms step:2218/3200 train_loss:3.4571 train_time:335356ms step_avg:151.88ms step:2219/3200 train_loss:3.4647 train_time:335507ms step_avg:151.88ms step:2220/3200 train_loss:3.3723 train_time:335656ms step_avg:151.88ms step:2221/3200 train_loss:3.6331 train_time:335807ms step_avg:151.88ms step:2222/3200 train_loss:3.5175 train_time:335956ms step_avg:151.88ms step:2223/3200 train_loss:3.5407 train_time:336106ms step_avg:151.88ms step:2224/3200 train_loss:3.4223 train_time:336257ms step_avg:151.88ms step:2225/3200 train_loss:3.5546 train_time:336408ms step_avg:151.88ms step:2226/3200 train_loss:3.3028 train_time:336559ms step_avg:151.88ms step:2227/3200 train_loss:3.5758 train_time:336709ms step_avg:151.88ms step:2228/3200 train_loss:3.5115 train_time:336858ms step_avg:151.87ms step:2229/3200 train_loss:3.3172 train_time:337009ms step_avg:151.87ms step:2230/3200 train_loss:3.6559 train_time:337159ms step_avg:151.87ms step:2231/3200 train_loss:3.3507 train_time:337310ms step_avg:151.87ms step:2232/3200 train_loss:3.8248 train_time:337460ms step_avg:151.87ms step:2233/3200 train_loss:3.5067 train_time:337611ms step_avg:151.87ms step:2234/3200 train_loss:3.4481 train_time:337761ms step_avg:151.87ms step:2235/3200 train_loss:3.4704 train_time:337913ms step_avg:151.87ms step:2236/3200 train_loss:3.2649 train_time:338063ms step_avg:151.87ms step:2237/3200 train_loss:3.2652 train_time:338215ms step_avg:151.87ms step:2238/3200 train_loss:3.4907 train_time:338365ms step_avg:151.87ms step:2239/3200 train_loss:3.5889 train_time:338517ms step_avg:151.87ms step:2240/3200 train_loss:3.3054 train_time:338668ms step_avg:151.87ms step:2241/3200 train_loss:3.3765 train_time:338818ms step_avg:151.87ms step:2242/3200 train_loss:3.5622 train_time:338968ms step_avg:151.87ms step:2243/3200 train_loss:3.5229 train_time:339118ms step_avg:151.87ms step:2244/3200 train_loss:3.3820 train_time:339268ms step_avg:151.87ms step:2245/3200 train_loss:3.4531 train_time:339418ms step_avg:151.86ms step:2246/3200 train_loss:3.4734 train_time:339569ms step_avg:151.86ms step:2247/3200 train_loss:3.3079 train_time:339718ms step_avg:151.86ms step:2248/3200 train_loss:3.3298 train_time:339868ms step_avg:151.86ms step:2249/3200 train_loss:3.5841 train_time:340018ms step_avg:151.86ms step:2250/3200 train_loss:3.3057 train_time:340167ms step_avg:151.86ms step:2250/3200 val_loss:3.4372 train_time:340215ms step_avg:151.88ms step:2251/3200 train_loss:3.3150 train_time:340327ms step_avg:151.86ms step:2252/3200 train_loss:3.3874 train_time:340479ms step_avg:151.86ms step:2253/3200 train_loss:3.3577 train_time:340627ms step_avg:151.86ms step:2254/3200 train_loss:3.4143 train_time:340776ms step_avg:151.86ms step:2255/3200 train_loss:3.4662 train_time:340924ms step_avg:151.86ms step:2256/3200 train_loss:3.3396 train_time:341074ms step_avg:151.86ms step:2257/3200 train_loss:3.6264 train_time:341225ms step_avg:151.86ms step:2258/3200 train_loss:3.5053 train_time:341378ms step_avg:151.86ms step:2259/3200 train_loss:3.8220 train_time:341530ms step_avg:151.86ms step:2260/3200 train_loss:3.5010 train_time:341681ms step_avg:151.86ms step:2261/3200 train_loss:3.5540 train_time:341831ms step_avg:151.86ms step:2262/3200 train_loss:3.4660 train_time:341982ms step_avg:151.86ms step:2263/3200 train_loss:3.4689 train_time:342129ms step_avg:151.85ms step:2264/3200 train_loss:3.2231 train_time:342280ms step_avg:151.85ms step:2265/3200 train_loss:3.3505 train_time:342431ms step_avg:151.85ms step:2266/3200 train_loss:3.5674 train_time:342582ms step_avg:151.85ms step:2267/3200 train_loss:3.3046 train_time:342732ms step_avg:151.85ms step:2268/3200 train_loss:3.3724 train_time:343038ms step_avg:151.92ms step:2269/3200 train_loss:3.3496 train_time:343196ms step_avg:151.92ms step:2270/3200 train_loss:3.3107 train_time:343345ms step_avg:151.92ms step:2271/3200 train_loss:3.7146 train_time:343494ms step_avg:151.92ms step:2272/3200 train_loss:3.3716 train_time:343644ms step_avg:151.92ms step:2273/3200 train_loss:3.3750 train_time:343793ms step_avg:151.92ms step:2274/3200 train_loss:3.4601 train_time:343944ms step_avg:151.92ms step:2275/3200 train_loss:3.4106 train_time:344099ms step_avg:151.92ms step:2276/3200 train_loss:3.4250 train_time:344250ms step_avg:151.92ms step:2277/3200 train_loss:3.3035 train_time:344401ms step_avg:151.92ms step:2278/3200 train_loss:3.4087 train_time:344549ms step_avg:151.92ms step:2279/3200 train_loss:3.5371 train_time:344699ms step_avg:151.92ms step:2280/3200 train_loss:3.3390 train_time:345015ms step_avg:151.99ms step:2281/3200 train_loss:3.3967 train_time:345162ms step_avg:151.99ms step:2282/3200 train_loss:3.4134 train_time:345310ms step_avg:151.99ms step:2283/3200 train_loss:3.5505 train_time:345459ms step_avg:151.98ms step:2284/3200 train_loss:3.4240 train_time:345608ms step_avg:151.98ms step:2285/3200 train_loss:3.4510 train_time:345756ms step_avg:151.98ms step:2286/3200 train_loss:3.4456 train_time:345908ms step_avg:151.98ms step:2287/3200 train_loss:3.4467 train_time:346063ms step_avg:151.98ms step:2288/3200 train_loss:3.4015 train_time:346212ms step_avg:151.98ms step:2289/3200 train_loss:3.5353 train_time:346363ms step_avg:151.98ms step:2290/3200 train_loss:3.5042 train_time:346511ms step_avg:151.98ms step:2291/3200 train_loss:3.3904 train_time:346661ms step_avg:151.98ms step:2292/3200 train_loss:3.7280 train_time:346810ms step_avg:151.98ms step:2293/3200 train_loss:3.3886 train_time:346963ms step_avg:151.98ms step:2294/3200 train_loss:3.3322 train_time:347113ms step_avg:151.98ms step:2295/3200 train_loss:3.5221 train_time:347266ms step_avg:151.98ms step:2296/3200 train_loss:3.4628 train_time:347415ms step_avg:151.97ms step:2297/3200 train_loss:3.4414 train_time:347566ms step_avg:151.97ms step:2298/3200 train_loss:3.8161 train_time:347714ms step_avg:151.97ms step:2299/3200 train_loss:3.3304 train_time:347866ms step_avg:151.97ms step:2300/3200 train_loss:3.3397 train_time:348017ms step_avg:151.97ms step:2301/3200 train_loss:3.6695 train_time:348168ms step_avg:151.97ms step:2302/3200 train_loss:3.4022 train_time:348319ms step_avg:151.97ms step:2303/3200 train_loss:3.4193 train_time:348469ms step_avg:151.97ms step:2304/3200 train_loss:3.4060 train_time:348619ms step_avg:151.97ms step:2305/3200 train_loss:3.3353 train_time:348768ms step_avg:151.97ms step:2306/3200 train_loss:3.4994 train_time:348917ms step_avg:151.97ms step:2307/3200 train_loss:3.3640 train_time:349068ms step_avg:151.97ms step:2308/3200 train_loss:3.3770 train_time:349220ms step_avg:151.97ms step:2309/3200 train_loss:3.5178 train_time:349371ms step_avg:151.97ms step:2310/3200 train_loss:3.4705 train_time:349523ms step_avg:151.97ms step:2311/3200 train_loss:3.3338 train_time:349672ms step_avg:151.97ms step:2312/3200 train_loss:3.4533 train_time:349823ms step_avg:151.96ms step:2313/3200 train_loss:3.5763 train_time:349973ms step_avg:151.96ms step:2314/3200 train_loss:3.3995 train_time:350124ms step_avg:151.96ms step:2315/3200 train_loss:3.3222 train_time:350274ms step_avg:151.96ms step:2316/3200 train_loss:3.4136 train_time:350426ms step_avg:151.96ms step:2317/3200 train_loss:3.2958 train_time:350576ms step_avg:151.96ms step:2318/3200 train_loss:3.3933 train_time:350728ms step_avg:151.96ms step:2319/3200 train_loss:3.4174 train_time:350878ms step_avg:151.96ms step:2320/3200 train_loss:3.2722 train_time:351029ms step_avg:151.96ms step:2321/3200 train_loss:3.4036 train_time:351179ms step_avg:151.96ms step:2322/3200 train_loss:3.4525 train_time:351329ms step_avg:151.96ms step:2323/3200 train_loss:3.3699 train_time:351481ms step_avg:151.96ms step:2324/3200 train_loss:3.4138 train_time:351632ms step_avg:151.96ms step:2325/3200 train_loss:3.3355 train_time:351783ms step_avg:151.96ms step:2326/3200 train_loss:3.4758 train_time:351933ms step_avg:151.96ms step:2327/3200 train_loss:3.4830 train_time:352085ms step_avg:151.96ms step:2328/3200 train_loss:3.2612 train_time:352234ms step_avg:151.96ms step:2329/3200 train_loss:3.3661 train_time:352385ms step_avg:151.96ms step:2330/3200 train_loss:3.3945 train_time:352536ms step_avg:151.96ms step:2331/3200 train_loss:3.3704 train_time:352687ms step_avg:151.95ms step:2332/3200 train_loss:3.5573 train_time:352837ms step_avg:151.95ms step:2333/3200 train_loss:3.4259 train_time:352988ms step_avg:151.95ms step:2334/3200 train_loss:3.4070 train_time:353139ms step_avg:151.95ms step:2335/3200 train_loss:3.4899 train_time:353288ms step_avg:151.95ms step:2336/3200 train_loss:3.3307 train_time:353440ms step_avg:151.95ms step:2337/3200 train_loss:3.4874 train_time:353590ms step_avg:151.95ms step:2338/3200 train_loss:3.4394 train_time:353741ms step_avg:151.95ms step:2339/3200 train_loss:3.3850 train_time:353890ms step_avg:151.95ms step:2340/3200 train_loss:3.4675 train_time:354041ms step_avg:151.95ms step:2341/3200 train_loss:3.5149 train_time:354191ms step_avg:151.95ms step:2342/3200 train_loss:3.3827 train_time:354341ms step_avg:151.95ms step:2343/3200 train_loss:3.3937 train_time:354490ms step_avg:151.95ms step:2344/3200 train_loss:3.4594 train_time:354641ms step_avg:151.95ms step:2345/3200 train_loss:3.3970 train_time:354790ms step_avg:151.94ms step:2346/3200 train_loss:3.5165 train_time:354942ms step_avg:151.94ms step:2347/3200 train_loss:3.4279 train_time:355093ms step_avg:151.94ms step:2348/3200 train_loss:3.5327 train_time:355245ms step_avg:151.94ms step:2349/3200 train_loss:3.4905 train_time:355393ms step_avg:151.94ms step:2350/3200 train_loss:3.5341 train_time:355547ms step_avg:151.94ms step:2351/3200 train_loss:3.2292 train_time:355698ms step_avg:151.94ms step:2352/3200 train_loss:3.3482 train_time:355847ms step_avg:151.94ms step:2353/3200 train_loss:3.3432 train_time:355998ms step_avg:151.94ms step:2354/3200 train_loss:3.5647 train_time:356149ms step_avg:151.94ms step:2355/3200 train_loss:3.3546 train_time:356300ms step_avg:151.94ms step:2356/3200 train_loss:3.3509 train_time:356450ms step_avg:151.94ms step:2357/3200 train_loss:3.4983 train_time:356602ms step_avg:151.94ms step:2358/3200 train_loss:3.3569 train_time:356751ms step_avg:151.94ms step:2359/3200 train_loss:3.4552 train_time:356903ms step_avg:151.94ms step:2360/3200 train_loss:3.3562 train_time:357052ms step_avg:151.94ms step:2361/3200 train_loss:3.3703 train_time:357203ms step_avg:151.94ms step:2362/3200 train_loss:3.3998 train_time:357351ms step_avg:151.93ms step:2363/3200 train_loss:3.4621 train_time:357503ms step_avg:151.93ms step:2364/3200 train_loss:3.4149 train_time:357652ms step_avg:151.93ms step:2365/3200 train_loss:3.8422 train_time:357804ms step_avg:151.93ms step:2366/3200 train_loss:3.4735 train_time:357953ms step_avg:151.93ms step:2367/3200 train_loss:3.6125 train_time:358106ms step_avg:151.93ms step:2368/3200 train_loss:3.4363 train_time:358256ms step_avg:151.93ms step:2369/3200 train_loss:3.4401 train_time:358408ms step_avg:151.93ms step:2370/3200 train_loss:3.4762 train_time:358558ms step_avg:151.93ms step:2371/3200 train_loss:3.3526 train_time:358709ms step_avg:151.93ms step:2372/3200 train_loss:3.5870 train_time:358859ms step_avg:151.93ms step:2373/3200 train_loss:3.4298 train_time:359009ms step_avg:151.93ms step:2374/3200 train_loss:3.9860 train_time:359160ms step_avg:151.93ms step:2375/3200 train_loss:3.4166 train_time:359309ms step_avg:151.93ms step:2375/3200 val_loss:3.4211 train_time:359356ms step_avg:151.95ms step:2376/3200 train_loss:3.3157 train_time:359466ms step_avg:151.93ms step:2377/3200 train_loss:3.4805 train_time:359619ms step_avg:151.93ms step:2378/3200 train_loss:3.4529 train_time:359768ms step_avg:151.93ms step:2379/3200 train_loss:3.4686 train_time:359917ms step_avg:151.93ms step:2380/3200 train_loss:3.4434 train_time:360064ms step_avg:151.93ms step:2381/3200 train_loss:3.3488 train_time:360214ms step_avg:151.92ms step:2382/3200 train_loss:3.4473 train_time:360363ms step_avg:151.92ms step:2383/3200 train_loss:3.4585 train_time:360516ms step_avg:151.92ms step:2384/3200 train_loss:3.4106 train_time:360667ms step_avg:151.92ms step:2385/3200 train_loss:3.3416 train_time:360819ms step_avg:151.92ms step:2386/3200 train_loss:3.4531 train_time:360967ms step_avg:151.92ms step:2387/3200 train_loss:3.4029 train_time:361118ms step_avg:151.92ms step:2388/3200 train_loss:3.4097 train_time:361267ms step_avg:151.92ms step:2389/3200 train_loss:3.4404 train_time:361419ms step_avg:151.92ms step:2390/3200 train_loss:3.4251 train_time:361568ms step_avg:151.92ms step:2391/3200 train_loss:3.4251 train_time:361720ms step_avg:151.92ms step:2392/3200 train_loss:3.3092 train_time:361870ms step_avg:151.92ms step:2393/3200 train_loss:3.5287 train_time:362021ms step_avg:151.92ms step:2394/3200 train_loss:3.3565 train_time:362170ms step_avg:151.92ms step:2395/3200 train_loss:3.4642 train_time:362323ms step_avg:151.92ms step:2396/3200 train_loss:3.5728 train_time:362471ms step_avg:151.92ms step:2397/3200 train_loss:3.5787 train_time:362622ms step_avg:151.92ms step:2398/3200 train_loss:3.5495 train_time:362773ms step_avg:151.92ms step:2399/3200 train_loss:3.5025 train_time:362924ms step_avg:151.91ms step:2400/3200 train_loss:3.3773 train_time:363073ms step_avg:151.91ms step:2401/3200 train_loss:3.3831 train_time:363222ms step_avg:151.91ms step:2402/3200 train_loss:3.4884 train_time:363372ms step_avg:151.91ms step:2403/3200 train_loss:3.3238 train_time:363523ms step_avg:151.91ms step:2404/3200 train_loss:3.4538 train_time:363675ms step_avg:151.91ms step:2405/3200 train_loss:3.6703 train_time:363825ms step_avg:151.91ms step:2406/3200 train_loss:3.3910 train_time:363976ms step_avg:151.91ms step:2407/3200 train_loss:3.5444 train_time:364126ms step_avg:151.91ms step:2408/3200 train_loss:3.4038 train_time:364277ms step_avg:151.91ms step:2409/3200 train_loss:3.3344 train_time:364426ms step_avg:151.91ms step:2410/3200 train_loss:3.4715 train_time:364578ms step_avg:151.91ms step:2411/3200 train_loss:3.2631 train_time:364729ms step_avg:151.91ms step:2412/3200 train_loss:3.6969 train_time:364881ms step_avg:151.91ms step:2413/3200 train_loss:3.3766 train_time:365031ms step_avg:151.91ms step:2414/3200 train_loss:3.4599 train_time:365182ms step_avg:151.91ms step:2415/3200 train_loss:3.3737 train_time:365334ms step_avg:151.91ms step:2416/3200 train_loss:3.4478 train_time:365484ms step_avg:151.91ms step:2417/3200 train_loss:3.2670 train_time:365635ms step_avg:151.91ms step:2418/3200 train_loss:3.1899 train_time:365785ms step_avg:151.90ms step:2419/3200 train_loss:3.4935 train_time:365937ms step_avg:151.90ms step:2420/3200 train_loss:3.3714 train_time:366086ms step_avg:151.90ms step:2421/3200 train_loss:3.3942 train_time:366237ms step_avg:151.90ms step:2422/3200 train_loss:3.5048 train_time:366387ms step_avg:151.90ms step:2423/3200 train_loss:3.5400 train_time:366539ms step_avg:151.90ms step:2424/3200 train_loss:3.3668 train_time:366688ms step_avg:151.90ms step:2425/3200 train_loss:3.4651 train_time:366841ms step_avg:151.90ms step:2426/3200 train_loss:3.4572 train_time:366990ms step_avg:151.90ms step:2427/3200 train_loss:3.3830 train_time:367142ms step_avg:151.90ms step:2428/3200 train_loss:3.3289 train_time:367292ms step_avg:151.90ms step:2429/3200 train_loss:3.4581 train_time:367443ms step_avg:151.90ms step:2430/3200 train_loss:3.3535 train_time:367593ms step_avg:151.90ms step:2431/3200 train_loss:3.4168 train_time:367744ms step_avg:151.90ms step:2432/3200 train_loss:3.4700 train_time:367894ms step_avg:151.90ms step:2433/3200 train_loss:3.4347 train_time:368044ms step_avg:151.90ms step:2434/3200 train_loss:3.3069 train_time:368194ms step_avg:151.90ms step:2435/3200 train_loss:3.2729 train_time:368345ms step_avg:151.89ms step:2436/3200 train_loss:3.4403 train_time:368496ms step_avg:151.89ms step:2437/3200 train_loss:3.2927 train_time:368645ms step_avg:151.89ms step:2438/3200 train_loss:3.3706 train_time:368797ms step_avg:151.89ms step:2439/3200 train_loss:3.4661 train_time:368946ms step_avg:151.89ms step:2440/3200 train_loss:3.3859 train_time:369097ms step_avg:151.89ms step:2441/3200 train_loss:3.4662 train_time:369247ms step_avg:151.89ms step:2442/3200 train_loss:3.3576 train_time:369399ms step_avg:151.89ms step:2443/3200 train_loss:3.4074 train_time:369548ms step_avg:151.89ms step:2444/3200 train_loss:3.2959 train_time:369700ms step_avg:151.89ms step:2445/3200 train_loss:3.3061 train_time:369850ms step_avg:151.89ms step:2446/3200 train_loss:3.4689 train_time:370001ms step_avg:151.89ms step:2447/3200 train_loss:3.3362 train_time:370151ms step_avg:151.89ms step:2448/3200 train_loss:3.4029 train_time:370302ms step_avg:151.89ms step:2449/3200 train_loss:3.5689 train_time:370452ms step_avg:151.89ms step:2450/3200 train_loss:3.3977 train_time:370602ms step_avg:151.89ms step:2451/3200 train_loss:3.4706 train_time:370751ms step_avg:151.88ms step:2452/3200 train_loss:3.3765 train_time:370903ms step_avg:151.88ms step:2453/3200 train_loss:3.4753 train_time:371051ms step_avg:151.88ms step:2454/3200 train_loss:3.3695 train_time:371203ms step_avg:151.88ms step:2455/3200 train_loss:3.4965 train_time:371353ms step_avg:151.88ms step:2456/3200 train_loss:3.4284 train_time:371504ms step_avg:151.88ms step:2457/3200 train_loss:3.3544 train_time:371796ms step_avg:151.94ms step:2458/3200 train_loss:3.2742 train_time:371951ms step_avg:151.94ms step:2459/3200 train_loss:3.4061 train_time:372101ms step_avg:151.94ms step:2460/3200 train_loss:4.0053 train_time:372249ms step_avg:151.94ms step:2461/3200 train_loss:3.4648 train_time:372399ms step_avg:151.94ms step:2462/3200 train_loss:3.2858 train_time:372547ms step_avg:151.94ms step:2463/3200 train_loss:3.4795 train_time:372699ms step_avg:151.94ms step:2464/3200 train_loss:3.3916 train_time:372853ms step_avg:151.94ms step:2465/3200 train_loss:3.5982 train_time:373004ms step_avg:151.94ms step:2466/3200 train_loss:3.7687 train_time:373154ms step_avg:151.94ms step:2467/3200 train_loss:3.5110 train_time:373303ms step_avg:151.93ms step:2468/3200 train_loss:3.3814 train_time:373453ms step_avg:151.93ms step:2469/3200 train_loss:3.4991 train_time:373602ms step_avg:151.93ms step:2470/3200 train_loss:3.5121 train_time:373915ms step_avg:152.00ms step:2471/3200 train_loss:3.3134 train_time:374061ms step_avg:152.00ms step:2472/3200 train_loss:3.4040 train_time:374211ms step_avg:151.99ms step:2473/3200 train_loss:3.4066 train_time:374360ms step_avg:151.99ms step:2474/3200 train_loss:3.5391 train_time:374509ms step_avg:151.99ms step:2475/3200 train_loss:3.6789 train_time:374658ms step_avg:151.99ms step:2476/3200 train_loss:3.2608 train_time:374811ms step_avg:151.99ms step:2477/3200 train_loss:3.4744 train_time:374967ms step_avg:151.99ms step:2478/3200 train_loss:3.4360 train_time:375118ms step_avg:151.99ms step:2479/3200 train_loss:3.2763 train_time:375266ms step_avg:151.99ms step:2480/3200 train_loss:3.2729 train_time:375415ms step_avg:151.99ms step:2481/3200 train_loss:3.4151 train_time:375564ms step_avg:151.99ms step:2482/3200 train_loss:3.4321 train_time:375714ms step_avg:151.99ms step:2483/3200 train_loss:3.4445 train_time:375864ms step_avg:151.99ms step:2484/3200 train_loss:3.4046 train_time:376018ms step_avg:151.99ms step:2485/3200 train_loss:3.4116 train_time:376168ms step_avg:151.99ms step:2486/3200 train_loss:3.2987 train_time:376319ms step_avg:151.99ms step:2487/3200 train_loss:3.4975 train_time:376467ms step_avg:151.98ms step:2488/3200 train_loss:3.4511 train_time:376617ms step_avg:151.98ms step:2489/3200 train_loss:3.3540 train_time:376765ms step_avg:151.98ms step:2490/3200 train_loss:3.4668 train_time:376919ms step_avg:151.98ms step:2491/3200 train_loss:3.5115 train_time:377069ms step_avg:151.98ms step:2492/3200 train_loss:3.5951 train_time:377221ms step_avg:151.98ms step:2493/3200 train_loss:3.4462 train_time:377372ms step_avg:151.98ms step:2494/3200 train_loss:3.3657 train_time:377521ms step_avg:151.98ms step:2495/3200 train_loss:3.4897 train_time:377670ms step_avg:151.98ms step:2496/3200 train_loss:3.4435 train_time:377822ms step_avg:151.98ms step:2497/3200 train_loss:3.3540 train_time:377972ms step_avg:151.98ms step:2498/3200 train_loss:3.4510 train_time:378122ms step_avg:151.98ms step:2499/3200 train_loss:3.5049 train_time:378273ms step_avg:151.98ms step:2500/3200 train_loss:3.5225 train_time:378424ms step_avg:151.98ms step:2500/3200 val_loss:3.3974 train_time:378471ms step_avg:152.00ms step:2501/3200 train_loss:3.4675 train_time:378582ms step_avg:151.98ms step:2502/3200 train_loss:3.4227 train_time:378733ms step_avg:151.98ms step:2503/3200 train_loss:3.4415 train_time:378884ms step_avg:151.98ms step:2504/3200 train_loss:3.3028 train_time:379031ms step_avg:151.98ms step:2505/3200 train_loss:3.5030 train_time:379179ms step_avg:151.98ms step:2506/3200 train_loss:3.4527 train_time:379328ms step_avg:151.97ms step:2507/3200 train_loss:3.3989 train_time:379478ms step_avg:151.97ms step:2508/3200 train_loss:3.3990 train_time:379634ms step_avg:151.98ms step:2509/3200 train_loss:3.3586 train_time:379787ms step_avg:151.98ms step:2510/3200 train_loss:3.5380 train_time:379935ms step_avg:151.97ms step:2511/3200 train_loss:3.3682 train_time:380085ms step_avg:151.97ms step:2512/3200 train_loss:3.3503 train_time:380233ms step_avg:151.97ms step:2513/3200 train_loss:3.4314 train_time:380385ms step_avg:151.97ms step:2514/3200 train_loss:3.4538 train_time:380536ms step_avg:151.97ms step:2515/3200 train_loss:3.3583 train_time:380690ms step_avg:151.97ms step:2516/3200 train_loss:3.4425 train_time:380841ms step_avg:151.97ms step:2517/3200 train_loss:3.4332 train_time:380990ms step_avg:151.97ms step:2518/3200 train_loss:3.3147 train_time:381140ms step_avg:151.97ms step:2519/3200 train_loss:3.3458 train_time:381289ms step_avg:151.97ms step:2520/3200 train_loss:3.4631 train_time:381440ms step_avg:151.97ms step:2521/3200 train_loss:3.4530 train_time:381590ms step_avg:151.97ms step:2522/3200 train_loss:3.3429 train_time:381744ms step_avg:151.97ms step:2523/3200 train_loss:3.3174 train_time:381894ms step_avg:151.97ms step:2524/3200 train_loss:3.4188 train_time:382045ms step_avg:151.97ms step:2525/3200 train_loss:3.2687 train_time:382194ms step_avg:151.97ms step:2526/3200 train_loss:3.4813 train_time:382346ms step_avg:151.97ms step:2527/3200 train_loss:3.3847 train_time:382495ms step_avg:151.96ms step:2528/3200 train_loss:3.3962 train_time:382647ms step_avg:151.96ms step:2529/3200 train_loss:3.3894 train_time:382795ms step_avg:151.96ms step:2530/3200 train_loss:3.3955 train_time:382948ms step_avg:151.96ms step:2531/3200 train_loss:3.4377 train_time:383099ms step_avg:151.96ms step:2532/3200 train_loss:3.2654 train_time:383249ms step_avg:151.96ms step:2533/3200 train_loss:3.4225 train_time:383399ms step_avg:151.96ms step:2534/3200 train_loss:3.3140 train_time:383549ms step_avg:151.96ms step:2535/3200 train_loss:3.3512 train_time:383698ms step_avg:151.96ms step:2536/3200 train_loss:3.4107 train_time:383847ms step_avg:151.96ms step:2537/3200 train_loss:3.4172 train_time:383998ms step_avg:151.96ms step:2538/3200 train_loss:3.2429 train_time:384149ms step_avg:151.96ms step:2539/3200 train_loss:3.5526 train_time:384299ms step_avg:151.96ms step:2540/3200 train_loss:3.2377 train_time:384449ms step_avg:151.96ms step:2541/3200 train_loss:3.4172 train_time:384600ms step_avg:151.96ms step:2542/3200 train_loss:3.1866 train_time:384749ms step_avg:151.95ms step:2543/3200 train_loss:3.6254 train_time:384898ms step_avg:151.95ms step:2544/3200 train_loss:3.3912 train_time:385050ms step_avg:151.95ms step:2545/3200 train_loss:3.5480 train_time:385201ms step_avg:151.95ms step:2546/3200 train_loss:3.3848 train_time:385351ms step_avg:151.95ms step:2547/3200 train_loss:3.3585 train_time:385503ms step_avg:151.95ms step:2548/3200 train_loss:3.3648 train_time:385655ms step_avg:151.95ms step:2549/3200 train_loss:3.5277 train_time:385807ms step_avg:151.95ms step:2550/3200 train_loss:3.3812 train_time:385956ms step_avg:151.95ms step:2551/3200 train_loss:3.3870 train_time:386109ms step_avg:151.95ms step:2552/3200 train_loss:3.4113 train_time:386261ms step_avg:151.95ms step:2553/3200 train_loss:3.4333 train_time:386410ms step_avg:151.95ms step:2554/3200 train_loss:3.3425 train_time:386561ms step_avg:151.95ms step:2555/3200 train_loss:3.4537 train_time:386710ms step_avg:151.95ms step:2556/3200 train_loss:3.5016 train_time:386861ms step_avg:151.95ms step:2557/3200 train_loss:3.4963 train_time:387011ms step_avg:151.95ms step:2558/3200 train_loss:3.3358 train_time:387162ms step_avg:151.95ms step:2559/3200 train_loss:3.3356 train_time:387313ms step_avg:151.95ms step:2560/3200 train_loss:3.3447 train_time:387465ms step_avg:151.95ms step:2561/3200 train_loss:3.4698 train_time:387614ms step_avg:151.95ms step:2562/3200 train_loss:3.5000 train_time:387765ms step_avg:151.95ms step:2563/3200 train_loss:3.3805 train_time:387913ms step_avg:151.94ms step:2564/3200 train_loss:3.4154 train_time:388065ms step_avg:151.94ms step:2565/3200 train_loss:3.3356 train_time:388214ms step_avg:151.94ms step:2566/3200 train_loss:3.3530 train_time:388367ms step_avg:151.94ms step:2567/3200 train_loss:3.3409 train_time:388517ms step_avg:151.94ms step:2568/3200 train_loss:3.3886 train_time:388668ms step_avg:151.94ms step:2569/3200 train_loss:3.5291 train_time:388817ms step_avg:151.94ms step:2570/3200 train_loss:3.4360 train_time:388968ms step_avg:151.94ms step:2571/3200 train_loss:3.5208 train_time:389119ms step_avg:151.94ms step:2572/3200 train_loss:3.2796 train_time:389269ms step_avg:151.94ms step:2573/3200 train_loss:3.3783 train_time:389419ms step_avg:151.94ms step:2574/3200 train_loss:3.0335 train_time:389570ms step_avg:151.94ms step:2575/3200 train_loss:3.2844 train_time:389721ms step_avg:151.94ms step:2576/3200 train_loss:3.2275 train_time:389871ms step_avg:151.94ms step:2577/3200 train_loss:3.3416 train_time:390021ms step_avg:151.94ms step:2578/3200 train_loss:3.3884 train_time:390171ms step_avg:151.94ms step:2579/3200 train_loss:3.3030 train_time:390321ms step_avg:151.94ms step:2580/3200 train_loss:3.3564 train_time:390471ms step_avg:151.93ms step:2581/3200 train_loss:3.3034 train_time:390623ms step_avg:151.93ms step:2582/3200 train_loss:3.4048 train_time:390772ms step_avg:151.93ms step:2583/3200 train_loss:3.2901 train_time:390924ms step_avg:151.93ms step:2584/3200 train_loss:3.4848 train_time:391073ms step_avg:151.93ms step:2585/3200 train_loss:3.3899 train_time:391224ms step_avg:151.93ms step:2586/3200 train_loss:3.4096 train_time:391373ms step_avg:151.93ms step:2587/3200 train_loss:3.5314 train_time:391525ms step_avg:151.93ms step:2588/3200 train_loss:3.4170 train_time:391673ms step_avg:151.93ms step:2589/3200 train_loss:3.2751 train_time:391825ms step_avg:151.93ms step:2590/3200 train_loss:3.4426 train_time:391974ms step_avg:151.93ms step:2591/3200 train_loss:3.3470 train_time:392126ms step_avg:151.93ms step:2592/3200 train_loss:3.5616 train_time:392275ms step_avg:151.93ms step:2593/3200 train_loss:3.4292 train_time:392428ms step_avg:151.93ms step:2594/3200 train_loss:3.2408 train_time:392577ms step_avg:151.93ms step:2595/3200 train_loss:3.3196 train_time:392729ms step_avg:151.93ms step:2596/3200 train_loss:3.7435 train_time:392878ms step_avg:151.92ms step:2597/3200 train_loss:3.4098 train_time:393029ms step_avg:151.92ms step:2598/3200 train_loss:3.4008 train_time:393180ms step_avg:151.92ms step:2599/3200 train_loss:3.2518 train_time:393330ms step_avg:151.92ms step:2600/3200 train_loss:3.4953 train_time:393481ms step_avg:151.92ms step:2601/3200 train_loss:3.6612 train_time:393631ms step_avg:151.92ms step:2602/3200 train_loss:3.2453 train_time:393782ms step_avg:151.92ms step:2603/3200 train_loss:3.3781 train_time:393931ms step_avg:151.92ms step:2604/3200 train_loss:3.2274 train_time:394082ms step_avg:151.92ms step:2605/3200 train_loss:3.5193 train_time:394232ms step_avg:151.92ms step:2606/3200 train_loss:3.3751 train_time:394382ms step_avg:151.92ms step:2607/3200 train_loss:3.2745 train_time:394531ms step_avg:151.92ms step:2608/3200 train_loss:3.2365 train_time:394681ms step_avg:151.92ms step:2609/3200 train_loss:3.3483 train_time:394832ms step_avg:151.92ms step:2610/3200 train_loss:3.5315 train_time:394983ms step_avg:151.92ms step:2611/3200 train_loss:3.3970 train_time:395132ms step_avg:151.92ms step:2612/3200 train_loss:3.2398 train_time:395284ms step_avg:151.92ms step:2613/3200 train_loss:3.3196 train_time:395434ms step_avg:151.91ms step:2614/3200 train_loss:3.4344 train_time:395585ms step_avg:151.91ms step:2615/3200 train_loss:3.3727 train_time:395736ms step_avg:151.91ms step:2616/3200 train_loss:3.3695 train_time:395888ms step_avg:151.91ms step:2617/3200 train_loss:3.4019 train_time:396036ms step_avg:151.91ms step:2618/3200 train_loss:3.4450 train_time:396187ms step_avg:151.91ms step:2619/3200 train_loss:3.2945 train_time:396339ms step_avg:151.91ms step:2620/3200 train_loss:3.4660 train_time:396491ms step_avg:151.91ms step:2621/3200 train_loss:3.4323 train_time:396640ms step_avg:151.91ms step:2622/3200 train_loss:3.5534 train_time:396790ms step_avg:151.91ms step:2623/3200 train_loss:3.4667 train_time:396940ms step_avg:151.91ms step:2624/3200 train_loss:3.3868 train_time:397090ms step_avg:151.91ms step:2625/3200 train_loss:3.3389 train_time:397241ms step_avg:151.91ms step:2625/3200 val_loss:3.3747 train_time:397288ms step_avg:151.93ms step:2626/3200 train_loss:3.3681 train_time:397400ms step_avg:151.91ms step:2627/3200 train_loss:3.4300 train_time:397553ms step_avg:151.91ms step:2628/3200 train_loss:3.2476 train_time:397702ms step_avg:151.91ms step:2629/3200 train_loss:3.5205 train_time:397851ms step_avg:151.91ms step:2630/3200 train_loss:3.3963 train_time:398000ms step_avg:151.91ms step:2631/3200 train_loss:3.4481 train_time:398148ms step_avg:151.91ms step:2632/3200 train_loss:3.6774 train_time:398299ms step_avg:151.91ms step:2633/3200 train_loss:3.4187 train_time:398450ms step_avg:151.91ms step:2634/3200 train_loss:3.3420 train_time:398602ms step_avg:151.91ms step:2635/3200 train_loss:3.3124 train_time:398753ms step_avg:151.91ms step:2636/3200 train_loss:3.3563 train_time:398902ms step_avg:151.90ms step:2637/3200 train_loss:3.1414 train_time:399051ms step_avg:151.90ms step:2638/3200 train_loss:3.4537 train_time:399200ms step_avg:151.90ms step:2639/3200 train_loss:3.4275 train_time:399350ms step_avg:151.90ms step:2640/3200 train_loss:3.3160 train_time:399502ms step_avg:151.90ms step:2641/3200 train_loss:3.4020 train_time:399654ms step_avg:151.90ms step:2642/3200 train_loss:3.4316 train_time:399803ms step_avg:151.90ms step:2643/3200 train_loss:3.2252 train_time:399954ms step_avg:151.90ms step:2644/3200 train_loss:3.3497 train_time:400103ms step_avg:151.90ms step:2645/3200 train_loss:3.4214 train_time:400254ms step_avg:151.90ms step:2646/3200 train_loss:3.3831 train_time:400557ms step_avg:151.96ms step:2647/3200 train_loss:3.2725 train_time:400717ms step_avg:151.96ms step:2648/3200 train_loss:3.4905 train_time:400864ms step_avg:151.96ms step:2649/3200 train_loss:3.7506 train_time:401013ms step_avg:151.96ms step:2650/3200 train_loss:3.3954 train_time:401162ms step_avg:151.96ms step:2651/3200 train_loss:3.3547 train_time:401311ms step_avg:151.95ms step:2652/3200 train_loss:3.4870 train_time:401462ms step_avg:151.95ms step:2653/3200 train_loss:3.3209 train_time:401615ms step_avg:151.95ms step:2654/3200 train_loss:3.3096 train_time:401766ms step_avg:151.95ms step:2655/3200 train_loss:3.3844 train_time:401917ms step_avg:151.95ms step:2656/3200 train_loss:3.3049 train_time:402066ms step_avg:151.95ms step:2657/3200 train_loss:3.3353 train_time:402216ms step_avg:151.95ms step:2658/3200 train_loss:3.3056 train_time:402365ms step_avg:151.95ms step:2659/3200 train_loss:3.3907 train_time:402516ms step_avg:151.95ms step:2660/3200 train_loss:3.5280 train_time:402837ms step_avg:152.01ms step:2661/3200 train_loss:3.3282 train_time:402984ms step_avg:152.01ms step:2662/3200 train_loss:3.4781 train_time:403135ms step_avg:152.01ms step:2663/3200 train_loss:3.3435 train_time:403282ms step_avg:152.01ms step:2664/3200 train_loss:3.3375 train_time:403431ms step_avg:152.01ms step:2665/3200 train_loss:3.2676 train_time:403581ms step_avg:152.01ms step:2666/3200 train_loss:3.3103 train_time:403732ms step_avg:152.01ms step:2667/3200 train_loss:3.3584 train_time:403888ms step_avg:152.01ms step:2668/3200 train_loss:3.3953 train_time:404040ms step_avg:152.01ms step:2669/3200 train_loss:3.3113 train_time:404190ms step_avg:152.01ms step:2670/3200 train_loss:3.3727 train_time:404339ms step_avg:152.01ms step:2671/3200 train_loss:3.2619 train_time:404487ms step_avg:152.01ms step:2672/3200 train_loss:3.3316 train_time:404637ms step_avg:152.01ms step:2673/3200 train_loss:3.3159 train_time:404788ms step_avg:152.00ms step:2674/3200 train_loss:3.3751 train_time:404941ms step_avg:152.00ms step:2675/3200 train_loss:3.4005 train_time:405091ms step_avg:152.00ms step:2676/3200 train_loss:3.3654 train_time:405242ms step_avg:152.00ms step:2677/3200 train_loss:3.3611 train_time:405391ms step_avg:152.00ms step:2678/3200 train_loss:3.3894 train_time:405540ms step_avg:152.00ms step:2679/3200 train_loss:3.4380 train_time:405690ms step_avg:152.00ms step:2680/3200 train_loss:3.3392 train_time:405841ms step_avg:152.00ms step:2681/3200 train_loss:3.2644 train_time:405992ms step_avg:152.00ms step:2682/3200 train_loss:3.3080 train_time:406143ms step_avg:152.00ms step:2683/3200 train_loss:3.7825 train_time:406294ms step_avg:152.00ms step:2684/3200 train_loss:3.3696 train_time:406442ms step_avg:152.00ms step:2685/3200 train_loss:3.4011 train_time:406593ms step_avg:152.00ms step:2686/3200 train_loss:3.4424 train_time:406743ms step_avg:152.00ms step:2687/3200 train_loss:3.3711 train_time:406894ms step_avg:152.00ms step:2688/3200 train_loss:3.4470 train_time:407044ms step_avg:152.00ms step:2689/3200 train_loss:3.3743 train_time:407196ms step_avg:152.00ms step:2690/3200 train_loss:3.3592 train_time:407344ms step_avg:151.99ms step:2691/3200 train_loss:3.3918 train_time:407496ms step_avg:151.99ms step:2692/3200 train_loss:3.4612 train_time:407644ms step_avg:151.99ms step:2693/3200 train_loss:3.2554 train_time:407796ms step_avg:151.99ms step:2694/3200 train_loss:3.6424 train_time:407944ms step_avg:151.99ms step:2695/3200 train_loss:3.4397 train_time:408096ms step_avg:151.99ms step:2696/3200 train_loss:3.2283 train_time:408245ms step_avg:151.99ms step:2697/3200 train_loss:3.4232 train_time:408397ms step_avg:151.99ms step:2698/3200 train_loss:3.3873 train_time:408546ms step_avg:151.99ms step:2699/3200 train_loss:3.3397 train_time:408699ms step_avg:151.99ms step:2700/3200 train_loss:3.4421 train_time:408848ms step_avg:151.99ms step:2701/3200 train_loss:3.4165 train_time:408999ms step_avg:151.99ms step:2702/3200 train_loss:3.3169 train_time:409149ms step_avg:151.99ms step:2703/3200 train_loss:3.3387 train_time:409300ms step_avg:151.99ms step:2704/3200 train_loss:3.3507 train_time:409450ms step_avg:151.99ms step:2705/3200 train_loss:3.3201 train_time:409601ms step_avg:151.99ms step:2706/3200 train_loss:3.4923 train_time:409750ms step_avg:151.98ms step:2707/3200 train_loss:3.4542 train_time:409900ms step_avg:151.98ms step:2708/3200 train_loss:3.3590 train_time:410050ms step_avg:151.98ms step:2709/3200 train_loss:3.3597 train_time:410201ms step_avg:151.98ms step:2710/3200 train_loss:3.4600 train_time:410350ms step_avg:151.98ms step:2711/3200 train_loss:3.3368 train_time:410501ms step_avg:151.98ms step:2712/3200 train_loss:3.4528 train_time:410651ms step_avg:151.98ms step:2713/3200 train_loss:3.1909 train_time:410802ms step_avg:151.98ms step:2714/3200 train_loss:3.3881 train_time:410952ms step_avg:151.98ms step:2715/3200 train_loss:3.2646 train_time:411102ms step_avg:151.98ms step:2716/3200 train_loss:3.2869 train_time:411253ms step_avg:151.98ms step:2717/3200 train_loss:3.4764 train_time:411404ms step_avg:151.98ms step:2718/3200 train_loss:3.3754 train_time:411554ms step_avg:151.98ms step:2719/3200 train_loss:3.6007 train_time:411705ms step_avg:151.98ms step:2720/3200 train_loss:3.3342 train_time:411856ms step_avg:151.98ms step:2721/3200 train_loss:3.3472 train_time:412006ms step_avg:151.98ms step:2722/3200 train_loss:3.5725 train_time:412159ms step_avg:151.98ms step:2723/3200 train_loss:3.3401 train_time:412309ms step_avg:151.98ms step:2724/3200 train_loss:3.5169 train_time:412460ms step_avg:151.98ms step:2725/3200 train_loss:3.3966 train_time:412609ms step_avg:151.97ms step:2726/3200 train_loss:3.3565 train_time:412761ms step_avg:151.97ms step:2727/3200 train_loss:3.3618 train_time:412909ms step_avg:151.97ms step:2728/3200 train_loss:3.6947 train_time:413060ms step_avg:151.97ms step:2729/3200 train_loss:3.4301 train_time:413210ms step_avg:151.97ms step:2730/3200 train_loss:3.2897 train_time:413361ms step_avg:151.97ms step:2731/3200 train_loss:3.4040 train_time:413510ms step_avg:151.97ms step:2732/3200 train_loss:3.3138 train_time:413660ms step_avg:151.97ms step:2733/3200 train_loss:3.1950 train_time:413810ms step_avg:151.97ms step:2734/3200 train_loss:3.3073 train_time:413961ms step_avg:151.97ms step:2735/3200 train_loss:3.3842 train_time:414111ms step_avg:151.97ms step:2736/3200 train_loss:3.2766 train_time:414262ms step_avg:151.97ms step:2737/3200 train_loss:3.6779 train_time:414413ms step_avg:151.97ms step:2738/3200 train_loss:3.4178 train_time:414563ms step_avg:151.97ms step:2739/3200 train_loss:3.6152 train_time:414713ms step_avg:151.97ms step:2740/3200 train_loss:3.3681 train_time:414863ms step_avg:151.96ms step:2741/3200 train_loss:3.3661 train_time:415014ms step_avg:151.96ms step:2742/3200 train_loss:3.3011 train_time:415164ms step_avg:151.96ms step:2743/3200 train_loss:3.3784 train_time:415314ms step_avg:151.96ms step:2744/3200 train_loss:3.3875 train_time:415464ms step_avg:151.96ms step:2745/3200 train_loss:3.4848 train_time:415616ms step_avg:151.96ms step:2746/3200 train_loss:3.2583 train_time:415766ms step_avg:151.96ms step:2747/3200 train_loss:3.3403 train_time:415918ms step_avg:151.96ms step:2748/3200 train_loss:3.3869 train_time:416068ms step_avg:151.96ms step:2749/3200 train_loss:3.4944 train_time:416219ms step_avg:151.96ms step:2750/3200 train_loss:3.3367 train_time:416370ms step_avg:151.96ms step:2750/3200 val_loss:3.3536 train_time:416417ms step_avg:151.98ms step:2751/3200 train_loss:3.4205 train_time:416530ms step_avg:151.96ms step:2752/3200 train_loss:3.4677 train_time:416681ms step_avg:151.96ms step:2753/3200 train_loss:3.3763 train_time:416829ms step_avg:151.96ms step:2754/3200 train_loss:3.3106 train_time:416977ms step_avg:151.96ms step:2755/3200 train_loss:3.3098 train_time:417126ms step_avg:151.96ms step:2756/3200 train_loss:3.3857 train_time:417274ms step_avg:151.96ms step:2757/3200 train_loss:3.3227 train_time:417426ms step_avg:151.96ms step:2758/3200 train_loss:3.2037 train_time:417578ms step_avg:151.96ms step:2759/3200 train_loss:3.5959 train_time:417729ms step_avg:151.96ms step:2760/3200 train_loss:3.4091 train_time:417880ms step_avg:151.96ms step:2761/3200 train_loss:3.3700 train_time:418029ms step_avg:151.96ms step:2762/3200 train_loss:3.3423 train_time:418178ms step_avg:151.95ms step:2763/3200 train_loss:3.2460 train_time:418328ms step_avg:151.95ms step:2764/3200 train_loss:3.4176 train_time:418477ms step_avg:151.95ms step:2765/3200 train_loss:3.3420 train_time:418629ms step_avg:151.95ms step:2766/3200 train_loss:3.2387 train_time:418778ms step_avg:151.95ms step:2767/3200 train_loss:3.3297 train_time:418928ms step_avg:151.95ms step:2768/3200 train_loss:3.4086 train_time:419078ms step_avg:151.95ms step:2769/3200 train_loss:3.2908 train_time:419228ms step_avg:151.95ms step:2770/3200 train_loss:3.3678 train_time:419377ms step_avg:151.95ms step:2771/3200 train_loss:3.3454 train_time:419528ms step_avg:151.95ms step:2772/3200 train_loss:3.7779 train_time:419679ms step_avg:151.95ms step:2773/3200 train_loss:3.2542 train_time:419829ms step_avg:151.95ms step:2774/3200 train_loss:3.3924 train_time:419980ms step_avg:151.95ms step:2775/3200 train_loss:3.4479 train_time:420130ms step_avg:151.95ms step:2776/3200 train_loss:3.4130 train_time:420281ms step_avg:151.95ms step:2777/3200 train_loss:3.4881 train_time:420429ms step_avg:151.94ms step:2778/3200 train_loss:3.4896 train_time:420579ms step_avg:151.94ms step:2779/3200 train_loss:3.3651 train_time:420729ms step_avg:151.94ms step:2780/3200 train_loss:3.2278 train_time:420879ms step_avg:151.94ms step:2781/3200 train_loss:3.3741 train_time:421030ms step_avg:151.94ms step:2782/3200 train_loss:3.3999 train_time:421180ms step_avg:151.94ms step:2783/3200 train_loss:3.2639 train_time:421330ms step_avg:151.94ms step:2784/3200 train_loss:3.3651 train_time:421480ms step_avg:151.94ms step:2785/3200 train_loss:3.4253 train_time:421630ms step_avg:151.94ms step:2786/3200 train_loss:3.3082 train_time:421780ms step_avg:151.94ms step:2787/3200 train_loss:3.4167 train_time:421930ms step_avg:151.94ms step:2788/3200 train_loss:3.3854 train_time:422081ms step_avg:151.94ms step:2789/3200 train_loss:3.3160 train_time:422231ms step_avg:151.94ms step:2790/3200 train_loss:3.4076 train_time:422383ms step_avg:151.94ms step:2791/3200 train_loss:3.3309 train_time:422532ms step_avg:151.94ms step:2792/3200 train_loss:3.2313 train_time:422685ms step_avg:151.94ms step:2793/3200 train_loss:3.3278 train_time:422835ms step_avg:151.93ms step:2794/3200 train_loss:3.3793 train_time:422986ms step_avg:151.93ms step:2795/3200 train_loss:3.2908 train_time:423135ms step_avg:151.93ms step:2796/3200 train_loss:3.3330 train_time:423287ms step_avg:151.93ms step:2797/3200 train_loss:3.2421 train_time:423436ms step_avg:151.93ms step:2798/3200 train_loss:3.3518 train_time:423587ms step_avg:151.93ms step:2799/3200 train_loss:3.3067 train_time:423736ms step_avg:151.93ms step:2800/3200 train_loss:3.4735 train_time:423887ms step_avg:151.93ms step:2801/3200 train_loss:3.4201 train_time:424037ms step_avg:151.93ms step:2802/3200 train_loss:3.3984 train_time:424187ms step_avg:151.93ms step:2803/3200 train_loss:3.3454 train_time:424337ms step_avg:151.93ms step:2804/3200 train_loss:3.5176 train_time:424487ms step_avg:151.93ms step:2805/3200 train_loss:3.4895 train_time:424636ms step_avg:151.93ms step:2806/3200 train_loss:3.2090 train_time:424788ms step_avg:151.93ms step:2807/3200 train_loss:3.6115 train_time:424937ms step_avg:151.93ms step:2808/3200 train_loss:3.3535 train_time:425088ms step_avg:151.93ms step:2809/3200 train_loss:3.2918 train_time:425238ms step_avg:151.92ms step:2810/3200 train_loss:3.3038 train_time:425388ms step_avg:151.92ms step:2811/3200 train_loss:3.4651 train_time:425539ms step_avg:151.92ms step:2812/3200 train_loss:3.4522 train_time:425689ms step_avg:151.92ms step:2813/3200 train_loss:3.2117 train_time:425840ms step_avg:151.92ms step:2814/3200 train_loss:3.4367 train_time:425991ms step_avg:151.92ms step:2815/3200 train_loss:3.5014 train_time:426141ms step_avg:151.92ms step:2816/3200 train_loss:3.3088 train_time:426291ms step_avg:151.92ms step:2817/3200 train_loss:2.9233 train_time:426442ms step_avg:151.92ms step:2818/3200 train_loss:3.3278 train_time:426591ms step_avg:151.92ms step:2819/3200 train_loss:3.2933 train_time:426741ms step_avg:151.92ms step:2820/3200 train_loss:3.4939 train_time:426890ms step_avg:151.92ms step:2821/3200 train_loss:3.3436 train_time:427042ms step_avg:151.92ms step:2822/3200 train_loss:3.4065 train_time:427191ms step_avg:151.92ms step:2823/3200 train_loss:3.3488 train_time:427343ms step_avg:151.92ms step:2824/3200 train_loss:3.3211 train_time:427493ms step_avg:151.92ms step:2825/3200 train_loss:3.2176 train_time:427644ms step_avg:151.92ms step:2826/3200 train_loss:3.4777 train_time:427794ms step_avg:151.92ms step:2827/3200 train_loss:3.3697 train_time:427946ms step_avg:151.92ms step:2828/3200 train_loss:3.2635 train_time:428095ms step_avg:151.91ms step:2829/3200 train_loss:3.3924 train_time:428247ms step_avg:151.91ms step:2830/3200 train_loss:3.3898 train_time:428397ms step_avg:151.91ms step:2831/3200 train_loss:3.3237 train_time:428548ms step_avg:151.91ms step:2832/3200 train_loss:3.4684 train_time:428699ms step_avg:151.91ms step:2833/3200 train_loss:3.3879 train_time:428849ms step_avg:151.91ms step:2834/3200 train_loss:3.3677 train_time:429001ms step_avg:151.91ms step:2835/3200 train_loss:3.1795 train_time:429303ms step_avg:151.97ms step:2836/3200 train_loss:3.4028 train_time:429461ms step_avg:151.97ms step:2837/3200 train_loss:3.3380 train_time:429610ms step_avg:151.97ms step:2838/3200 train_loss:3.6291 train_time:429758ms step_avg:151.97ms step:2839/3200 train_loss:3.2959 train_time:429906ms step_avg:151.96ms step:2840/3200 train_loss:3.3024 train_time:430056ms step_avg:151.96ms step:2841/3200 train_loss:3.3517 train_time:430206ms step_avg:151.96ms step:2842/3200 train_loss:3.2873 train_time:430360ms step_avg:151.96ms step:2843/3200 train_loss:3.2890 train_time:430511ms step_avg:151.96ms step:2844/3200 train_loss:3.4577 train_time:430662ms step_avg:151.96ms step:2845/3200 train_loss:3.3360 train_time:430811ms step_avg:151.96ms step:2846/3200 train_loss:3.3747 train_time:430962ms step_avg:151.96ms step:2847/3200 train_loss:3.3325 train_time:431111ms step_avg:151.96ms step:2848/3200 train_loss:3.5932 train_time:431262ms step_avg:151.96ms step:2849/3200 train_loss:3.2632 train_time:431413ms step_avg:151.96ms step:2850/3200 train_loss:3.2991 train_time:431733ms step_avg:152.02ms step:2851/3200 train_loss:3.4002 train_time:431880ms step_avg:152.02ms step:2852/3200 train_loss:3.3720 train_time:432029ms step_avg:152.02ms step:2853/3200 train_loss:3.3331 train_time:432177ms step_avg:152.01ms step:2854/3200 train_loss:3.4073 train_time:432327ms step_avg:152.01ms step:2855/3200 train_loss:3.2323 train_time:432475ms step_avg:152.01ms step:2856/3200 train_loss:3.2568 train_time:432627ms step_avg:152.01ms step:2857/3200 train_loss:3.3517 train_time:432779ms step_avg:152.01ms step:2858/3200 train_loss:3.3430 train_time:432930ms step_avg:152.01ms step:2859/3200 train_loss:3.2464 train_time:433080ms step_avg:152.01ms step:2860/3200 train_loss:3.3346 train_time:433229ms step_avg:152.01ms step:2861/3200 train_loss:3.2953 train_time:433377ms step_avg:152.01ms step:2862/3200 train_loss:3.3318 train_time:433528ms step_avg:152.01ms step:2863/3200 train_loss:3.3763 train_time:433680ms step_avg:152.01ms step:2864/3200 train_loss:3.6439 train_time:433831ms step_avg:152.01ms step:2865/3200 train_loss:3.4392 train_time:433982ms step_avg:152.01ms step:2866/3200 train_loss:3.3411 train_time:434131ms step_avg:152.01ms step:2867/3200 train_loss:3.2033 train_time:434282ms step_avg:152.01ms step:2868/3200 train_loss:3.4251 train_time:434430ms step_avg:152.00ms step:2869/3200 train_loss:3.3807 train_time:434580ms step_avg:152.00ms step:2870/3200 train_loss:3.3347 train_time:434731ms step_avg:152.00ms step:2871/3200 train_loss:3.4756 train_time:434882ms step_avg:152.00ms step:2872/3200 train_loss:3.2413 train_time:435032ms step_avg:152.00ms step:2873/3200 train_loss:3.3168 train_time:435185ms step_avg:152.00ms step:2874/3200 train_loss:3.1906 train_time:435334ms step_avg:152.00ms step:2875/3200 train_loss:3.3319 train_time:435485ms step_avg:152.00ms step:2875/3200 val_loss:3.3340 train_time:435532ms step_avg:152.02ms step:2876/3200 train_loss:3.2549 train_time:435644ms step_avg:152.00ms step:2877/3200 train_loss:3.2429 train_time:435799ms step_avg:152.01ms step:2878/3200 train_loss:3.3273 train_time:435949ms step_avg:152.00ms step:2879/3200 train_loss:3.4434 train_time:436096ms step_avg:152.00ms step:2880/3200 train_loss:3.3944 train_time:436246ms step_avg:152.00ms step:2881/3200 train_loss:3.3412 train_time:436394ms step_avg:152.00ms step:2882/3200 train_loss:3.3266 train_time:436545ms step_avg:152.00ms step:2883/3200 train_loss:3.4469 train_time:436697ms step_avg:152.00ms step:2884/3200 train_loss:3.2230 train_time:436848ms step_avg:152.00ms step:2885/3200 train_loss:3.2466 train_time:436997ms step_avg:152.00ms step:2886/3200 train_loss:3.2997 train_time:437147ms step_avg:152.00ms step:2887/3200 train_loss:3.2939 train_time:437295ms step_avg:152.00ms step:2888/3200 train_loss:3.3019 train_time:437444ms step_avg:152.00ms step:2889/3200 train_loss:3.3321 train_time:437594ms step_avg:152.00ms step:2890/3200 train_loss:3.5193 train_time:437746ms step_avg:152.00ms step:2891/3200 train_loss:3.3559 train_time:437897ms step_avg:151.99ms step:2892/3200 train_loss:3.1833 train_time:438050ms step_avg:152.00ms step:2893/3200 train_loss:3.1156 train_time:438198ms step_avg:151.99ms step:2894/3200 train_loss:3.2633 train_time:438350ms step_avg:151.99ms step:2895/3200 train_loss:3.1514 train_time:438498ms step_avg:151.99ms step:2896/3200 train_loss:3.3263 train_time:438650ms step_avg:151.99ms step:2897/3200 train_loss:3.4597 train_time:438799ms step_avg:151.99ms step:2898/3200 train_loss:3.2827 train_time:438950ms step_avg:151.99ms step:2899/3200 train_loss:3.3752 train_time:439099ms step_avg:151.99ms step:2900/3200 train_loss:3.2564 train_time:439251ms step_avg:151.99ms step:2901/3200 train_loss:3.4478 train_time:439399ms step_avg:151.99ms step:2902/3200 train_loss:3.4359 train_time:439551ms step_avg:151.99ms step:2903/3200 train_loss:3.4582 train_time:439699ms step_avg:151.99ms step:2904/3200 train_loss:3.1908 train_time:439852ms step_avg:151.99ms step:2905/3200 train_loss:3.3346 train_time:440002ms step_avg:151.99ms step:2906/3200 train_loss:3.3103 train_time:440154ms step_avg:151.99ms step:2907/3200 train_loss:3.3735 train_time:440304ms step_avg:151.99ms step:2908/3200 train_loss:3.3168 train_time:440454ms step_avg:151.99ms step:2909/3200 train_loss:3.2907 train_time:440604ms step_avg:151.98ms step:2910/3200 train_loss:3.6249 train_time:440756ms step_avg:151.98ms step:2911/3200 train_loss:3.3389 train_time:440906ms step_avg:151.98ms step:2912/3200 train_loss:3.2471 train_time:441057ms step_avg:151.98ms step:2913/3200 train_loss:3.2280 train_time:441208ms step_avg:151.98ms step:2914/3200 train_loss:3.7051 train_time:441358ms step_avg:151.98ms step:2915/3200 train_loss:3.2974 train_time:441510ms step_avg:151.98ms step:2916/3200 train_loss:3.2535 train_time:441659ms step_avg:151.98ms step:2917/3200 train_loss:3.2380 train_time:441811ms step_avg:151.98ms step:2918/3200 train_loss:3.5150 train_time:441960ms step_avg:151.98ms step:2919/3200 train_loss:3.0190 train_time:442113ms step_avg:151.98ms step:2920/3200 train_loss:3.2224 train_time:442262ms step_avg:151.98ms step:2921/3200 train_loss:3.2442 train_time:442413ms step_avg:151.98ms step:2922/3200 train_loss:3.3441 train_time:442563ms step_avg:151.98ms step:2923/3200 train_loss:3.3807 train_time:442713ms step_avg:151.98ms step:2924/3200 train_loss:3.4127 train_time:442863ms step_avg:151.98ms step:2925/3200 train_loss:3.4247 train_time:443014ms step_avg:151.98ms step:2926/3200 train_loss:3.3124 train_time:443163ms step_avg:151.98ms step:2927/3200 train_loss:3.3091 train_time:443314ms step_avg:151.98ms step:2928/3200 train_loss:3.3061 train_time:443463ms step_avg:151.98ms step:2929/3200 train_loss:3.3061 train_time:443615ms step_avg:151.97ms step:2930/3200 train_loss:3.2671 train_time:443765ms step_avg:151.97ms step:2931/3200 train_loss:3.2990 train_time:443915ms step_avg:151.97ms step:2932/3200 train_loss:3.4233 train_time:444066ms step_avg:151.97ms step:2933/3200 train_loss:3.4635 train_time:444216ms step_avg:151.97ms step:2934/3200 train_loss:3.4385 train_time:444366ms step_avg:151.97ms step:2935/3200 train_loss:3.2781 train_time:444516ms step_avg:151.97ms step:2936/3200 train_loss:3.3298 train_time:444666ms step_avg:151.97ms step:2937/3200 train_loss:3.2802 train_time:444817ms step_avg:151.97ms step:2938/3200 train_loss:3.3006 train_time:444967ms step_avg:151.97ms step:2939/3200 train_loss:3.3248 train_time:445117ms step_avg:151.97ms step:2940/3200 train_loss:3.3662 train_time:445269ms step_avg:151.97ms step:2941/3200 train_loss:3.4131 train_time:445418ms step_avg:151.97ms step:2942/3200 train_loss:3.4021 train_time:445569ms step_avg:151.97ms step:2943/3200 train_loss:3.3312 train_time:445718ms step_avg:151.97ms step:2944/3200 train_loss:3.2037 train_time:445870ms step_avg:151.97ms step:2945/3200 train_loss:3.1498 train_time:446019ms step_avg:151.97ms step:2946/3200 train_loss:3.3542 train_time:446171ms step_avg:151.97ms step:2947/3200 train_loss:3.4219 train_time:446320ms step_avg:151.96ms step:2948/3200 train_loss:3.3487 train_time:446473ms step_avg:151.96ms step:2949/3200 train_loss:3.5350 train_time:446623ms step_avg:151.96ms step:2950/3200 train_loss:3.3461 train_time:446774ms step_avg:151.96ms step:2951/3200 train_loss:3.3533 train_time:446924ms step_avg:151.96ms step:2952/3200 train_loss:3.7570 train_time:447074ms step_avg:151.96ms step:2953/3200 train_loss:3.4292 train_time:447226ms step_avg:151.96ms step:2954/3200 train_loss:3.3743 train_time:447376ms step_avg:151.96ms step:2955/3200 train_loss:3.3919 train_time:447528ms step_avg:151.96ms step:2956/3200 train_loss:3.3178 train_time:447677ms step_avg:151.96ms step:2957/3200 train_loss:3.3384 train_time:447828ms step_avg:151.96ms step:2958/3200 train_loss:3.2207 train_time:447978ms step_avg:151.96ms step:2959/3200 train_loss:3.3000 train_time:448130ms step_avg:151.96ms step:2960/3200 train_loss:3.4451 train_time:448279ms step_avg:151.96ms step:2961/3200 train_loss:3.2473 train_time:448431ms step_avg:151.96ms step:2962/3200 train_loss:3.3727 train_time:448579ms step_avg:151.96ms step:2963/3200 train_loss:3.2415 train_time:448731ms step_avg:151.96ms step:2964/3200 train_loss:3.3029 train_time:448880ms step_avg:151.96ms step:2965/3200 train_loss:3.2847 train_time:449033ms step_avg:151.96ms step:2966/3200 train_loss:3.3844 train_time:449182ms step_avg:151.96ms step:2967/3200 train_loss:3.2690 train_time:449334ms step_avg:151.96ms step:2968/3200 train_loss:3.5094 train_time:449485ms step_avg:151.96ms step:2969/3200 train_loss:3.3623 train_time:449635ms step_avg:151.95ms step:2970/3200 train_loss:3.3805 train_time:449785ms step_avg:151.95ms step:2971/3200 train_loss:3.3579 train_time:449935ms step_avg:151.95ms step:2972/3200 train_loss:3.4312 train_time:450085ms step_avg:151.95ms step:2973/3200 train_loss:3.2616 train_time:450235ms step_avg:151.95ms step:2974/3200 train_loss:3.2707 train_time:450386ms step_avg:151.95ms step:2975/3200 train_loss:3.1936 train_time:450535ms step_avg:151.95ms step:2976/3200 train_loss:3.2622 train_time:450687ms step_avg:151.95ms step:2977/3200 train_loss:3.2547 train_time:450836ms step_avg:151.95ms step:2978/3200 train_loss:3.2795 train_time:450988ms step_avg:151.95ms step:2979/3200 train_loss:3.5617 train_time:451137ms step_avg:151.95ms step:2980/3200 train_loss:3.3643 train_time:451289ms step_avg:151.95ms step:2981/3200 train_loss:3.4033 train_time:451439ms step_avg:151.95ms step:2982/3200 train_loss:3.4207 train_time:451590ms step_avg:151.95ms step:2983/3200 train_loss:3.4903 train_time:451740ms step_avg:151.95ms step:2984/3200 train_loss:3.3024 train_time:451892ms step_avg:151.95ms step:2985/3200 train_loss:3.3951 train_time:452043ms step_avg:151.95ms step:2986/3200 train_loss:3.3990 train_time:452193ms step_avg:151.95ms step:2987/3200 train_loss:3.3450 train_time:452343ms step_avg:151.95ms step:2988/3200 train_loss:3.4709 train_time:452493ms step_avg:151.95ms step:2989/3200 train_loss:3.0577 train_time:452644ms step_avg:151.94ms step:2990/3200 train_loss:3.4163 train_time:452794ms step_avg:151.94ms step:2991/3200 train_loss:3.3728 train_time:452945ms step_avg:151.94ms step:2992/3200 train_loss:3.3211 train_time:453094ms step_avg:151.94ms step:2993/3200 train_loss:3.2574 train_time:453245ms step_avg:151.94ms step:2994/3200 train_loss:3.3984 train_time:453395ms step_avg:151.94ms step:2995/3200 train_loss:3.2265 train_time:453546ms step_avg:151.94ms step:2996/3200 train_loss:3.2337 train_time:453696ms step_avg:151.94ms step:2997/3200 train_loss:3.3184 train_time:453847ms step_avg:151.94ms step:2998/3200 train_loss:3.2607 train_time:453996ms step_avg:151.94ms step:2999/3200 train_loss:3.3814 train_time:454149ms step_avg:151.94ms step:3000/3200 train_loss:3.2792 train_time:454298ms step_avg:151.94ms step:3000/3200 val_loss:3.3162 train_time:454345ms step_avg:151.95ms step:3001/3200 train_loss:3.2723 train_time:454455ms step_avg:151.94ms step:3002/3200 train_loss:3.2218 train_time:454608ms step_avg:151.94ms step:3003/3200 train_loss:3.2545 train_time:454758ms step_avg:151.94ms step:3004/3200 train_loss:3.3894 train_time:454906ms step_avg:151.94ms step:3005/3200 train_loss:3.7311 train_time:455055ms step_avg:151.94ms step:3006/3200 train_loss:3.2926 train_time:455203ms step_avg:151.94ms step:3007/3200 train_loss:3.3728 train_time:455354ms step_avg:151.94ms step:3008/3200 train_loss:3.1721 train_time:455507ms step_avg:151.94ms step:3009/3200 train_loss:3.4000 train_time:455660ms step_avg:151.94ms step:3010/3200 train_loss:3.2957 train_time:455808ms step_avg:151.94ms step:3011/3200 train_loss:3.3555 train_time:455959ms step_avg:151.94ms step:3012/3200 train_loss:3.3481 train_time:456107ms step_avg:151.93ms step:3013/3200 train_loss:3.2316 train_time:456256ms step_avg:151.93ms step:3014/3200 train_loss:3.4293 train_time:456407ms step_avg:151.93ms step:3015/3200 train_loss:3.3947 train_time:456559ms step_avg:151.93ms step:3016/3200 train_loss:3.2584 train_time:456710ms step_avg:151.93ms step:3017/3200 train_loss:3.3076 train_time:456860ms step_avg:151.93ms step:3018/3200 train_loss:3.3430 train_time:457009ms step_avg:151.93ms step:3019/3200 train_loss:3.3863 train_time:457161ms step_avg:151.93ms step:3020/3200 train_loss:3.1663 train_time:457310ms step_avg:151.93ms step:3021/3200 train_loss:3.4698 train_time:457462ms step_avg:151.93ms step:3022/3200 train_loss:3.3033 train_time:457612ms step_avg:151.93ms step:3023/3200 train_loss:3.2209 train_time:457763ms step_avg:151.93ms step:3024/3200 train_loss:3.3226 train_time:458064ms step_avg:151.98ms step:3025/3200 train_loss:3.2949 train_time:458222ms step_avg:151.98ms step:3026/3200 train_loss:3.3564 train_time:458373ms step_avg:151.98ms step:3027/3200 train_loss:3.3808 train_time:458523ms step_avg:151.98ms step:3028/3200 train_loss:3.2793 train_time:458672ms step_avg:151.98ms step:3029/3200 train_loss:3.0888 train_time:458822ms step_avg:151.98ms step:3030/3200 train_loss:3.4250 train_time:458971ms step_avg:151.98ms step:3031/3200 train_loss:3.1884 train_time:459126ms step_avg:151.98ms step:3032/3200 train_loss:3.1783 train_time:459277ms step_avg:151.98ms step:3033/3200 train_loss:3.5177 train_time:459428ms step_avg:151.98ms step:3034/3200 train_loss:3.5163 train_time:459580ms step_avg:151.98ms step:3035/3200 train_loss:3.2846 train_time:459728ms step_avg:151.98ms step:3036/3200 train_loss:3.3643 train_time:459879ms step_avg:151.98ms step:3037/3200 train_loss:3.3087 train_time:460028ms step_avg:151.98ms step:3038/3200 train_loss:3.2142 train_time:460182ms step_avg:151.98ms step:3039/3200 train_loss:3.2642 train_time:460330ms step_avg:151.97ms step:3040/3200 train_loss:3.3603 train_time:460643ms step_avg:152.03ms step:3041/3200 train_loss:3.3468 train_time:460794ms step_avg:152.03ms step:3042/3200 train_loss:3.1478 train_time:460944ms step_avg:152.03ms step:3043/3200 train_loss:3.3074 train_time:461092ms step_avg:152.02ms step:3044/3200 train_loss:3.3356 train_time:461242ms step_avg:152.02ms step:3045/3200 train_loss:3.3473 train_time:461389ms step_avg:152.02ms step:3046/3200 train_loss:3.4192 train_time:461542ms step_avg:152.02ms step:3047/3200 train_loss:3.2422 train_time:461695ms step_avg:152.02ms step:3048/3200 train_loss:3.3689 train_time:461846ms step_avg:152.02ms step:3049/3200 train_loss:3.3112 train_time:461997ms step_avg:152.02ms step:3050/3200 train_loss:3.2364 train_time:462145ms step_avg:152.02ms step:3051/3200 train_loss:3.3615 train_time:462294ms step_avg:152.02ms step:3052/3200 train_loss:3.2075 train_time:462444ms step_avg:152.02ms step:3053/3200 train_loss:3.4470 train_time:462595ms step_avg:152.02ms step:3054/3200 train_loss:3.4000 train_time:462746ms step_avg:152.02ms step:3055/3200 train_loss:3.3783 train_time:462896ms step_avg:152.02ms step:3056/3200 train_loss:3.3785 train_time:463047ms step_avg:152.02ms step:3057/3200 train_loss:3.2557 train_time:463197ms step_avg:152.02ms step:3058/3200 train_loss:3.2797 train_time:463346ms step_avg:152.02ms step:3059/3200 train_loss:3.3576 train_time:463496ms step_avg:152.02ms step:3060/3200 train_loss:3.2676 train_time:463647ms step_avg:152.02ms step:3061/3200 train_loss:3.3281 train_time:463798ms step_avg:152.02ms step:3062/3200 train_loss:3.3321 train_time:463949ms step_avg:152.01ms step:3063/3200 train_loss:3.2718 train_time:464101ms step_avg:152.01ms step:3064/3200 train_loss:3.2360 train_time:464250ms step_avg:152.01ms step:3065/3200 train_loss:3.2571 train_time:464402ms step_avg:152.01ms step:3066/3200 train_loss:3.2359 train_time:464551ms step_avg:152.01ms step:3067/3200 train_loss:3.2205 train_time:464703ms step_avg:152.01ms step:3068/3200 train_loss:3.1854 train_time:464853ms step_avg:152.01ms step:3069/3200 train_loss:3.2263 train_time:465004ms step_avg:152.01ms step:3070/3200 train_loss:3.2138 train_time:465155ms step_avg:152.01ms step:3071/3200 train_loss:3.4085 train_time:465305ms step_avg:152.01ms step:3072/3200 train_loss:3.3347 train_time:465455ms step_avg:152.01ms step:3073/3200 train_loss:3.3780 train_time:465605ms step_avg:152.01ms step:3074/3200 train_loss:3.3650 train_time:465755ms step_avg:152.01ms step:3075/3200 train_loss:3.3094 train_time:465906ms step_avg:152.01ms step:3076/3200 train_loss:3.3614 train_time:466056ms step_avg:152.01ms step:3077/3200 train_loss:3.4215 train_time:466206ms step_avg:152.01ms step:3078/3200 train_loss:3.2165 train_time:466355ms step_avg:152.01ms step:3079/3200 train_loss:3.7571 train_time:466506ms step_avg:152.01ms step:3080/3200 train_loss:3.3009 train_time:466657ms step_avg:152.01ms step:3081/3200 train_loss:3.2653 train_time:466806ms step_avg:152.00ms step:3082/3200 train_loss:3.4208 train_time:466957ms step_avg:152.00ms step:3083/3200 train_loss:3.2224 train_time:467106ms step_avg:152.00ms step:3084/3200 train_loss:3.2568 train_time:467257ms step_avg:152.00ms step:3085/3200 train_loss:3.3146 train_time:467408ms step_avg:152.00ms step:3086/3200 train_loss:3.4074 train_time:467559ms step_avg:152.00ms step:3087/3200 train_loss:3.3124 train_time:467708ms step_avg:152.00ms step:3088/3200 train_loss:3.2222 train_time:467860ms step_avg:152.00ms step:3089/3200 train_loss:3.3745 train_time:468009ms step_avg:152.00ms step:3090/3200 train_loss:3.2373 train_time:468161ms step_avg:152.00ms step:3091/3200 train_loss:3.4997 train_time:468311ms step_avg:152.00ms step:3092/3200 train_loss:4.0691 train_time:468463ms step_avg:152.00ms step:3093/3200 train_loss:3.3411 train_time:468613ms step_avg:152.00ms step:3094/3200 train_loss:3.2256 train_time:468764ms step_avg:152.00ms step:3095/3200 train_loss:3.1872 train_time:468914ms step_avg:152.00ms step:3096/3200 train_loss:3.3520 train_time:469064ms step_avg:152.00ms step:3097/3200 train_loss:3.4881 train_time:469215ms step_avg:152.00ms step:3098/3200 train_loss:3.2540 train_time:469364ms step_avg:152.00ms step:3099/3200 train_loss:3.2885 train_time:469516ms step_avg:152.00ms step:3100/3200 train_loss:3.4619 train_time:469665ms step_avg:152.00ms step:3101/3200 train_loss:3.3713 train_time:469815ms step_avg:151.99ms step:3102/3200 train_loss:3.3606 train_time:469966ms step_avg:151.99ms step:3103/3200 train_loss:3.2725 train_time:470116ms step_avg:151.99ms step:3104/3200 train_loss:3.5304 train_time:470266ms step_avg:151.99ms step:3105/3200 train_loss:3.3509 train_time:470418ms step_avg:151.99ms step:3106/3200 train_loss:3.2026 train_time:470567ms step_avg:151.99ms step:3107/3200 train_loss:3.2313 train_time:470719ms step_avg:151.99ms step:3108/3200 train_loss:3.1885 train_time:470868ms step_avg:151.99ms step:3109/3200 train_loss:3.4166 train_time:471019ms step_avg:151.99ms step:3110/3200 train_loss:3.2988 train_time:471168ms step_avg:151.99ms step:3111/3200 train_loss:3.3293 train_time:471321ms step_avg:151.99ms step:3112/3200 train_loss:3.3140 train_time:471471ms step_avg:151.99ms step:3113/3200 train_loss:3.3633 train_time:471624ms step_avg:151.99ms step:3114/3200 train_loss:3.3226 train_time:471773ms step_avg:151.99ms step:3115/3200 train_loss:3.3196 train_time:471924ms step_avg:151.99ms step:3116/3200 train_loss:3.3595 train_time:472075ms step_avg:151.99ms step:3117/3200 train_loss:3.2051 train_time:472225ms step_avg:151.99ms step:3118/3200 train_loss:3.2324 train_time:472376ms step_avg:151.99ms step:3119/3200 train_loss:3.4046 train_time:472525ms step_avg:151.99ms step:3120/3200 train_loss:3.3931 train_time:472675ms step_avg:151.99ms step:3121/3200 train_loss:3.1768 train_time:472826ms step_avg:151.99ms step:3122/3200 train_loss:3.3762 train_time:472975ms step_avg:151.98ms step:3123/3200 train_loss:3.4349 train_time:473125ms step_avg:151.98ms step:3124/3200 train_loss:3.4003 train_time:473276ms step_avg:151.98ms step:3125/3200 train_loss:3.1967 train_time:473426ms step_avg:151.98ms step:3125/3200 val_loss:3.3015 train_time:473472ms step_avg:152.00ms step:3126/3200 train_loss:3.2773 train_time:473582ms step_avg:151.98ms step:3127/3200 train_loss:3.3092 train_time:473735ms step_avg:151.98ms step:3128/3200 train_loss:3.4000 train_time:473885ms step_avg:151.98ms step:3129/3200 train_loss:3.4744 train_time:474032ms step_avg:151.98ms step:3130/3200 train_loss:3.1747 train_time:474182ms step_avg:151.98ms step:3131/3200 train_loss:3.3403 train_time:474330ms step_avg:151.98ms step:3132/3200 train_loss:3.3410 train_time:474481ms step_avg:151.98ms step:3133/3200 train_loss:3.3686 train_time:474634ms step_avg:151.98ms step:3134/3200 train_loss:3.2524 train_time:474786ms step_avg:151.98ms step:3135/3200 train_loss:3.3756 train_time:474936ms step_avg:151.98ms step:3136/3200 train_loss:3.2857 train_time:475087ms step_avg:151.98ms step:3137/3200 train_loss:3.3581 train_time:475236ms step_avg:151.98ms step:3138/3200 train_loss:3.5412 train_time:475387ms step_avg:151.98ms step:3139/3200 train_loss:3.5211 train_time:475538ms step_avg:151.98ms step:3140/3200 train_loss:3.2833 train_time:475690ms step_avg:151.98ms step:3141/3200 train_loss:3.3018 train_time:475840ms step_avg:151.98ms step:3142/3200 train_loss:3.2262 train_time:475990ms step_avg:151.98ms step:3143/3200 train_loss:3.3223 train_time:476138ms step_avg:151.98ms step:3144/3200 train_loss:3.1213 train_time:476288ms step_avg:151.97ms step:3145/3200 train_loss:3.3625 train_time:476438ms step_avg:151.97ms step:3146/3200 train_loss:3.2727 train_time:476589ms step_avg:151.97ms step:3147/3200 train_loss:3.2887 train_time:476742ms step_avg:151.97ms step:3148/3200 train_loss:3.4624 train_time:476891ms step_avg:151.97ms step:3149/3200 train_loss:3.5451 train_time:477043ms step_avg:151.97ms step:3150/3200 train_loss:3.4254 train_time:477192ms step_avg:151.97ms step:3151/3200 train_loss:3.2264 train_time:477343ms step_avg:151.97ms step:3152/3200 train_loss:3.2798 train_time:477492ms step_avg:151.97ms step:3153/3200 train_loss:3.2529 train_time:477642ms step_avg:151.97ms step:3154/3200 train_loss:3.3742 train_time:477794ms step_avg:151.97ms step:3155/3200 train_loss:3.1895 train_time:477946ms step_avg:151.97ms step:3156/3200 train_loss:3.3215 train_time:478095ms step_avg:151.97ms step:3157/3200 train_loss:3.2714 train_time:478248ms step_avg:151.97ms step:3158/3200 train_loss:3.3983 train_time:478397ms step_avg:151.97ms step:3159/3200 train_loss:3.4523 train_time:478549ms step_avg:151.97ms step:3160/3200 train_loss:3.2978 train_time:478699ms step_avg:151.97ms step:3161/3200 train_loss:3.3640 train_time:478851ms step_avg:151.97ms step:3162/3200 train_loss:3.4491 train_time:479002ms step_avg:151.97ms step:3163/3200 train_loss:3.3474 train_time:479151ms step_avg:151.97ms step:3164/3200 train_loss:3.3970 train_time:479303ms step_avg:151.97ms step:3165/3200 train_loss:3.2241 train_time:479453ms step_avg:151.97ms step:3166/3200 train_loss:3.2092 train_time:479605ms step_avg:151.97ms step:3167/3200 train_loss:3.2490 train_time:479754ms step_avg:151.97ms step:3168/3200 train_loss:3.0694 train_time:479906ms step_avg:151.97ms step:3169/3200 train_loss:3.2396 train_time:480054ms step_avg:151.96ms step:3170/3200 train_loss:3.3745 train_time:480206ms step_avg:151.96ms step:3171/3200 train_loss:3.3954 train_time:480355ms step_avg:151.96ms step:3172/3200 train_loss:3.3691 train_time:480508ms step_avg:151.96ms step:3173/3200 train_loss:3.3460 train_time:480658ms step_avg:151.96ms step:3174/3200 train_loss:3.3098 train_time:480809ms step_avg:151.96ms step:3175/3200 train_loss:3.3070 train_time:480960ms step_avg:151.96ms step:3176/3200 train_loss:3.2973 train_time:481111ms step_avg:151.96ms step:3177/3200 train_loss:3.2402 train_time:481261ms step_avg:151.96ms step:3178/3200 train_loss:3.3598 train_time:481410ms step_avg:151.96ms step:3179/3200 train_loss:3.4504 train_time:481560ms step_avg:151.96ms step:3180/3200 train_loss:3.2815 train_time:481712ms step_avg:151.96ms step:3181/3200 train_loss:3.2762 train_time:481862ms step_avg:151.96ms step:3182/3200 train_loss:3.3158 train_time:482013ms step_avg:151.96ms step:3183/3200 train_loss:3.4157 train_time:482163ms step_avg:151.96ms step:3184/3200 train_loss:3.4353 train_time:482316ms step_avg:151.96ms step:3185/3200 train_loss:3.3314 train_time:482464ms step_avg:151.96ms step:3186/3200 train_loss:3.4011 train_time:482613ms step_avg:151.96ms step:3187/3200 train_loss:3.3969 train_time:482765ms step_avg:151.96ms step:3188/3200 train_loss:3.1894 train_time:482914ms step_avg:151.96ms step:3189/3200 train_loss:3.2801 train_time:483067ms step_avg:151.96ms step:3190/3200 train_loss:3.3032 train_time:483217ms step_avg:151.96ms step:3191/3200 train_loss:3.3260 train_time:483369ms step_avg:151.95ms step:3192/3200 train_loss:3.2876 train_time:483518ms step_avg:151.95ms step:3193/3200 train_loss:3.2139 train_time:483669ms step_avg:151.95ms step:3194/3200 train_loss:4.2302 train_time:483818ms step_avg:151.95ms step:3195/3200 train_loss:3.3234 train_time:483969ms step_avg:151.95ms step:3196/3200 train_loss:3.1258 train_time:484118ms step_avg:151.95ms step:3197/3200 train_loss:3.2700 train_time:484270ms step_avg:151.95ms step:3198/3200 train_loss:3.1390 train_time:484419ms step_avg:151.95ms step:3199/3200 train_loss:3.2683 train_time:484571ms step_avg:151.95ms step:3200/3200 train_loss:3.2074 train_time:484721ms step_avg:151.95ms step:3200/3200 val_loss:3.2972 train_time:484767ms step_avg:151.96ms