Hello,
The network I am training works fine, however when I try to learn about the norms of gradients or distributions, it blows towards the end of training. I tested different things about the gpu memory or my code, could not find the reason. Here are a couple codes I tried to add:
(the register_hook one is extremely slow)
(I am not 100% sure that the reason is these codes, however the problem occurs consistently when I add these lines)
for name, param in N.named_parameters():
cur_w_norm = sq_tensor_norm(param)
cur_g_norm = sq_tensor_norm(param.grad)
cur_w_name = 'weights/N_' + name + '_norm'
writer.add_scalar(cur_w_name , cur_w_norm.item(), epoch)
cur_g_name = 'grads/N_' + name + '_norm'
writer.add_scalar(cur_g_name , cur_g_norm.item(), epoch)
def sq_tensor_norm(M):
num_tensors = functools.reduce(operator.mul, list(M.size()))
return (torch.sum(M ** 2)) / num_tensors # ** 0.5
for name, param in N.named_parameters():
grad_array = param.grad.cpu().numpy().flatten()
n, bins, patches = plt.hist(grad_array, 50, facecolor='g', normed=True, alpha=0.75)
del grad_array
plt.xlabel('grads ' + name)
plt.title('Histogram of grads for ' + name)
plt.grid(True)
writer.add_figure('grad_histograms/histogram_grads_' + name, plt
.figure(1), global_step=None, close=True, walltime=None)
plt.close('all')
with torch.no_grad():
for name, param in N.named_parameters():
writer.add_histogram('grads/' + 'N_' + name, param.clone().cpu().numpy(),epoch, bins=20)
for name, param in N.named_parameters():
writer.add_histogram('grads/' + 'N_' + name, param.clone().detach().cpu().numpy(),epoch, bins=20)
for name, param in N.named_parameters():
F_cur = per_layer_grad_histograms(writer, name)
param.register_hook(F_cur)
def per_layer_grad_histograms(writer, name):
return lambda grad: per_layer_grad_hist(writer, grad, name)
def per_layer_grad_hist(writer, grad, name):
grad_array = grad.cpu().numpy().flatten()
n, bins, patches = plt.hist(grad_array, 50, facecolor='g', normed=True, alpha=0.75)
plt.xlabel('grads ' + name)
plt.title('Histogram of grads for ' + name)
plt.grid(True)
writer.add_figure('grads/histogram_grads_' + name, plt
.figure(1), global_step=None, close=True, walltime=None)
plt.close('all')