I wrote the following code to train my model:
model.train()
min = 1.0
loss_all = []
for epoch in tqdm(range(epochs), desc="Epochs"):
running_loss = []
for step, (anchor_img, positive_img, negative_img, anchor_label) in enumerate(tqdm(train_loader, desc="Training", leave=False)):
anchor_img = anchor_img.to(device)
positive_img = positive_img.to(device)
negative_img = negative_img.to(device)
optimizer.zero_grad()
anchor_out = model(anchor_img)
positive_out = model(positive_img)
negative_out = model(negative_img)
loss = criterion(anchor_out, positive_out, negative_out)
loss.backward()
optimizer.step()
running_loss.append(loss.cpu().detach().numpy())
print("Epoch: {}/{} - Loss: {:.4f}".format(epoch+1, epochs, np.mean(running_loss)))
loss_all.append(np.mean(running_loss))
if min>=np.mean(running_loss):
min = np.mean(running_loss)
torch.save({"model_state_dict": model.state_dict(),
"optimzier_state_dict": optimizer.state_dict()
}, PATH2+"trained_model_random.pt")
sm = torch.jit.script(model)
sm.save(PATH2+"trained_model_random_cpp.pt")
train_results = []
labels = []
total_correct = 0
total_instances = 0
model.eval()
with torch.no_grad():
for img, _, _, label in tqdm(train_loader):
train_results.append(model(img.to(device)).cpu().numpy())
labels.append(label)
# tq = label.to(device)
# # print(model(img.to(device)).cpu().numpy().shape)
# predictions = torch.argmax(model(img.to(device)), dim=1)
# # print(predictions)
# correct_predictions = sum(predictions==tq).item()
# total_correct+=correct_predictions
# total_instances+=len(img)
# print(total_correct/total_instances)
train_results = np.concatenate(train_results)
# print(len(train_results[0]))
labels = np.concatenate(labels)
train_results.shape
tree = XGBClassifier(seed=2020)
tree.fit(train_results, labels)
test_results = []
test_labels = []
dk = True
model.eval()
# t1 = time.time()*1000
with torch.no_grad():
for img in tqdm(test_loader):
if dk: t1 = time.time()*1000
test_results.append(model(img.to(device)).cpu().numpy())
if dk: t2 = time.time()*1000
dk = False
# t2 = time.time()*1000
test_results = np.concatenate(test_results)
plt.figure(figsize=(30, 25), facecolor="azure")
plt.scatter(test_results[:, 0], test_results[:, 1], label=label)
submit = pd.read_csv(PATH+"sample_submission.csv")
t1 = time.time()*1000
submit.Label = tree.predict(test_results)
t2 = time.time()*1000
tree.save_model(PATH2+"random_one_pred.json")
result = submit['Label'].tolist()
correct = 0
for i in range(len(result)):
if(test_label[i] == result[i]):
correct+=1
accuracy = correct*100/len(result)
There are total 100,000 images and divided them into 75-25 ratio for training and test data. The accuracy score is around 80%.
Then I wrote another code where I only load the .pt file and .json file and test the model with the images that I used for training. But everytime it is predicting the same class label. I am not understanding why my model is not working at all even for the train datatset. Here is the code snippet for this part:
def run_all(filename):
PATH = "/content/random/"
torch.manual_seed(2020)
np.random.seed(2020)
random.seed(2020)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type == "cuda":
torch.cuda.get_device_name()
embedding_dims = 30
batch_size = 32
epochs = 60
fileName = filename
test_df = pd.DataFrame(
{
"Imagename": [fileName]
}
)
test_ds = MNIST(test_df,PATH, train=False, transform=transforms.ToTensor())
test_loader = DataLoader(test_ds, batch_size=1, shuffle=False, num_workers=4,pin_memory=True)
# model = Network(embedding_dims)
model = torch.jit.load(PATH+"trained_model_random_cpp.pt")
test=""
t1 = time.time()*1000
model.eval()
with torch.no_grad():
for img in tqdm(test_loader):
test = model(img.to(device)).cpu().numpy()
t2 = time.time()*1000
tree = XGBClassifier(seed=2020)
tree.load_model(PATH+"random_one_pred.json")
t5 = time.time()*1000
submit = tree.predict(test)
t6 = time.time()*1000
print(submit)
print(t6-t5)