I’m training my model but the prediction is being broadcasted over the whole tensor
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 6, kernel_size=4, padding=1)
self.conv2 = nn.Conv2d(6, 12, kernel_size=4)
#self.conv2_drop = nn.Dropout2d()
#self.fc1 = nn.Linear(32* 193 * 7, args.output_dim* 2)
#self.fc2 = nn.Linear( args.output_dim* 2, args.output_dim)""
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2(x), 2))
#x = x.view(-1, 1369720)
#x = F.relu(self.fc1(x))
#x = F.dropout(x,p = 0.4, training=self.training)
#x = self.fc2(x)
#return F.log_softmax(x, dim=1)
return x
class Combine(nn.Module):
def __init__(self):
super(Combine, self).__init__()
self.cnn = CNN()
self.rnn = nn.LSTM(
input_size=56856,
hidden_size=args.unit_dim,
num_layers=args.layer_dim,
batch_first=True)
self.linear = nn.Linear(args.unit_dim,args.output_dim)
def forward(self, x):
#print(x.size())
batch_size, C, H, W = x.size()
timesteps = W
c_in = x.view(batch_size , C, H, W)
c_out = self.cnn(c_in)
batch_size, C, H, W = c_out.size()
timesteps = W
#print(c_out.shape)
r_in = c_out.view(batch_size,timesteps, -1)
# Initialize hidden state with zeros
h0 = torch.zeros(args.layer_dim, r_in.size(0), args.unit_dim).requires_grad_().cuda()
# Initialize cell state
c0 = torch.zeros(args.layer_dim, x.size(0), args.unit_dim).requires_grad_().cuda()
r_out, (h_n, h_c) = self.rnn(r_in, (h0.detach(), c0.detach()))
r_out2 = self.linear(r_out[:, -1, :])
return F.log_softmax(r_out2, dim=1)
#return r_out2
model = Combine()
if args.cuda:
model.cuda()
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
# Cross Entropy Loss
criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
def train(epoch):
model.train()
correct = 0
total_train = 0
correct_train = 0
for batch_idx, (data, target,length) in enumerate(train_loader):
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = data, target
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, torch.max(target.data, 1)[1])
loss.backward()
optimizer.step()
pred = output.data.max(
1, keepdim=True)[1] # get the index of the max log-probability
print('prediction: ',pred)
print('target: ',target.data.max(1, keepdim=True )[1])
correct += (pred.cpu() == torch.max(target.data, 1)[1].cpu()).sum()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test():
model.eval()
test_loss = 0
correct = 0
total = 0
for data, target, length in validation_loader:
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
print(target.size())
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = data, target
output = model(data)
test_loss += F.nll_loss(
output, torch.max(target.data, 1)[1], size_average=False).item() # sum up batch loss
pred = output.data.max(
1, keepdim=True)[1] # get the index of the max log-probability
total += target.size(0)
correct += (pred.cpu() == target.data.max(
1, keepdim=True)[1] .cpu()).sum()
print(pred)
print( torch.max(target.data, 1)[1])
total = target.size(0)
test_loss /= len(validation_loader.dataset)
print(
'\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, total,
100. * correct / total))
torch.save({
'drive/My Drive/epoch': args.epochs,
'drive/My Drive/model_state_dict': model.state_dict(),
'drive/My Drive/optimizer_state_dict': optimizer.state_dict(),
'drive/My Drive/loss': test_loss,
}, 'drive/My Drive/weights.pth')
for epoch in range(1, args.epochs + 1):
train(epoch)
test()
and here’s the output
prediction: tensor([[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4]], device='cuda:0')
target: tensor([[ 5],
[ 6],
[14],
[ 3],
[ 6],
[ 9],
[10],
[12],
[ 3],
[12],
[12],
[12],
[ 6],
[ 6],
[ 5],
[10],
[ 3],
[ 1],
[13],
[ 5],
[11],
[ 6],
[ 2],
[ 1],
[ 0],
[ 5],
[ 5],
[ 0],
[ 6]], device='cuda:0')
Train Epoch: 1 [0/148 (0%)] Loss: 2.725649
prediction: tensor([[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4]], device='cuda:0')
target: tensor([[ 7],
[ 5],
[ 1],
[ 9],
[13],
[11],
[ 0],
[ 9],
[ 4],
[ 3],
[10],
[ 1],
[10],
[ 2],
[12],
[ 1],
[ 9],
[ 0],
[10],
[ 8],
[13],
[ 7],
[ 8],
[ 9],
[ 4],
[ 9],
[13],
[ 7],
[11]], device='cuda:0')
Train Epoch: 1 [29/148 (20%)] Loss: 2.724051
prediction: tensor([[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4]], device='cuda:0')
target: tensor([[ 7],
[ 9],
[ 5],
[ 3],
[ 7],
[ 0],
[13],
[13],
[ 2],
[ 7],
[14],
[ 2],
[11],
[ 7],
[14],
[14],
[13],
[ 2],
[ 8],
[ 9],
[ 4],
[11],
[ 0],
[ 4],
[12],
[ 6],
[14],
[13],
[ 2]], device='cuda:0')
Train Epoch: 1 [58/148 (40%)] Loss: 2.707235
prediction: tensor([[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4],
[4]], device='cuda:0')
target: tensor([[ 2],
[14],
[ 8],
[11],
[ 6],
[ 7],
[ 4],
[14],
[ 8],
[ 9],
[12],
[ 2],
[ 3],
[ 2],
[ 3],
[ 1],
[10],
[12],
[ 0],
[ 3],
[12],
[10],
[10],
[ 1],
[ 4],
[13],
[ 8],
[ 8],
[12]], device='cuda:0')
Train Epoch: 1 [87/148 (60%)] Loss: 2.686659
can any one please tell me what’s wrong with my code