When you run the below code then the last line - print(test_output[0])
- shows that the model returns all ‘nan’ values. Also when I added a print statement to the start of the forward method to print out a weight - print("preforward w: ", w[0][0][0])
- in the 4th round it throws an error RuntimeError: Overflow when unpacking long
.
My main problem is that I don’t know why the nan
values come. Any ideas?
import torch
import torchvision
import torch.nn as nn
import numpy as np
import torchvision.transforms as transforms
from torchvision import datasets
from torch.autograd import Variable
import pandas as pd
torch.manual_seed(1)
class MnistModel(nn.Module):
def __init__(self, w, b):
super(MnistModel, self).__init__()
self.w = torch.nn.Parameter(w)
self.b = torch.nn.Parameter(b)
self.w2 = torch.nn.Parameter(torch.rand(1000, 784, 10))
self.w3 = torch.nn.Parameter(torch.rand(1000, 784, 1))
def forward(self, x):
# print("preforward w: ", w[0][0][0])
layer = torch.mul(x , self.w)
layer = torch.bmm(layer, self.w2)
layer = layer.view(1000, 10, 784)
layer = torch.bmm(layer, self.w3)
output = layer.view(1000, 10)
return output
batch_size = 1000
classes = 10
train_data = datasets.MNIST('data', train=True,
download=True,
transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(train_data,
batch_size=batch_size,
shuffle=False)
test_data = datasets.MNIST('data', train=False, transform=transforms.ToTensor())
test_loader = torch.utils.data.DataLoader(test_data,batch_size=batch_size)
w = torch.randn(1, 784, requires_grad=True)
b = torch.randn(784, 1, requires_grad=True)
learning_rate = 0.001
model = MnistModel(w, b)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
rows = np.array(range(batch_size))
zeros = torch.zeros((batch_size, 10), dtype=torch.float64)
for i, (raw_data, raw_target) in enumerate(train_loader):
data = raw_data.view((batch_size, 784, 1))
logits = model(data)
zeros = pd.DataFrame(0, index=range(batch_size), columns=range(classes))
zeros.iloc[[rows, raw_target.numpy()]] = 1
zeros = torch.from_numpy(zeros.as_matrix())
target = zeros.float()
loss = criterion(logits, target)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i == 3:
break
for i, (raw_data, raw_target) in enumerate(test_loader):
if i == 1:
break
data = raw_data.view((batch_size, 784, 1))
test_output = model(data)
print(test_output[0])