I have been using PyTorch since about 2 weeks now (mainly due to the computing courses I have taken). But, I am not able to debug and figure out why my network is not learning.

If I flatten both target and prediction, I am getting some error. If I convert both to one hot, I get multi-target not supported error. Now I am getting tensor grad error. I am finding this difficult to debug because, if I enable `loss_tensor.requires_grad = True`

, I would get a network that wouldn’t minimise loss. I am not sure what I am doing wrong here.

Also, I have a bit of background with Keras and I am finding PyTorch really difficult although I do believe this PyTorch will help me in the long run as I can get fine-grained control.

```
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import cifar10
def oneHotEncode(labels1D, num_classes):
labels1D[labels1D < 0] = 0; labels1D[labels1D >= num_classes] = num_classes - 1
labels2D = np.zeros([labels1D.shape[0], num_classes])
np.put(labels2D, labels1D + num_classes * np.arange(labels1D.shape[0]), 1)
return labels2D
#(x_train_keras, y_train_keras), (x_test_keras, y_test_keras) = cifar100.load_data(label_mode="fine")
(x_train_keras, y_train_keras), (x_test_keras, y_test_keras) = cifar10.load_data()
x_train_keras, x_test_keras = np.mean(x_train_keras, axis = 3), np.mean(x_test_keras, axis = 3)
X_train_data, y_train_data = x_train_keras.reshape(-1, 32 * 32), y_train_keras
X_valid_data, y_valid_data = x_test_keras.reshape(-1, 32 * 32), y_test_keras
x_train_keras, x_test_keras = x_train_keras / 255.0, x_test_keras / 255.0
print(np.min(x_train_keras), np.max(x_train_keras))
n_epochs = 10
batch_size = 32
lr = 0.1
num_classes = 10
s = np.hstack([X_train_data, y_train_data])
np.random.shuffle(s)
X_train = s[:, :-y_train_data.shape[1]]
y_train = s[:, X_train_data.shape[1]:(X_train_data.shape[1] + y_train_data.shape[1])]
model = nn.Sequential(nn.Linear(X_train.shape[1], 256),
nn.ReLU(),
nn.Linear(256, y_train.shape[1]))
criterion = nn.CrossEntropyLoss()
optimiser = optim.SGD(model.parameters(), lr = lr)
lossList, accuracyList = [list(), list()]
for eachEpoch in range(n_epochs):
accuracy, loss = [0, 0]
for eachBatch_i in range(int(X_train.shape[0]/batch_size)):
X_train_ = X_train[eachBatch_i * batch_size:(eachBatch_i + 1) * batch_size, :]
y_train_ = y_train[eachBatch_i * batch_size:(eachBatch_i + 1) * batch_size, :]
X_train1 = torch.from_numpy(X_train_).float()
y_train1 = torch.from_numpy(y_train_.flatten()).long()
y_pred = model(X_train1)
y_pred1 = y_pred.data.numpy().astype("int")
y_pred2 = torch.from_numpy(oneHotEncode(y_pred1, num_classes))
loss_tensor = criterion(y_pred2, y_train1.flatten())
#loss_tensor.requires_grad = True
loss_ = loss_tensor.item()
optimiser.zero_grad()
loss_tensor.backward()
#optimiser.backward()
optimiser.step()
#print(y_pred.data.numpy().shape, y_train_.shape)
accuracy = accuracy + np.sum(y_pred.data.numpy().flatten() == y_train_.flatten())
loss = loss + loss_
accuracy = accuracy / X_train.shape[0]
loss = loss / X_train.shape[0]
accuracyList.append(accuracy)
lossList.append(loss)
print("Epoch = " + str(eachEpoch), "Loss = " + str(loss), "Accuracy = " + str(accuracy))
plt.figure()
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Accuracy vs Epochs")
plt.plot(accuracyList)
plt.show()
plt.figure()
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Loss vs Epochs")
plt.plot(lossList)
plt.show()
```

Thanks in advance!