i’m trying to do a vision regression task where i’m trying to detect a point in space (the image) based on the location of my character.
Thing is my accuracy is always 0, I’m unsure what I’m doing wrong.
This is my first time trying to write my own PyTorch task, did I do a mistake, or is the model not strong enough?
Structure of my data
Basically I’m trying to predict x,y based on image, which I will use it as a point.
But whatever I tried, my model can’t converge, I don’t care about test data yet etc, just trying to learn step by step things.
Here is my code
from cgitb import grey
from unittest import skip
from torchvision import datasets, transforms
from torch.utils.data import DataLoader,Dataset
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import cv2 as cv
%matplotlib inline
device = "cuda" if torch.cuda.is_available() else "cpu"
# ## Data
class Dataset(Dataset):
def __init__(self):
self.data = np.loadtxt('data.csv', delimiter=',', dtype=str)
def __getitem__(self, index):
img = cv.imread(f"C:\\Users\\Ovi\\Desktop\\rs-dl-bot\\model\\data\\{self.data[index,3]}")
img_resized = cv.resize(img, (400,400), interpolation = cv.INTER_AREA)
gray = cv.cvtColor(img_resized, cv.COLOR_BGR2GRAY)
gray = (gray / 255)
gray = torch.tensor(gray).float()
x1 = torch.tensor([float(self.data[index,1])], dtype=torch.float)
x2 = torch.tensor([float(self.data[index,2])], dtype=torch.float)
x1 = (x1 / 1.89) / 400 # normalizing data to be between 0-1
x2 = (x2 / 1.25) / 400 # normalizing data to be between 0-1
return gray,x1,x2
def __len__(self):
return len(self.data)
dataset = Dataset()
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(400*400, 1024)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(1024, 1024)
self.fc3 = nn.Linear(1024, 2)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = x.view(-1, 400*400)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
x = sigmoid(x)
return x
model = Model()
model.to(device)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
EPOCH = 200
loss_total = []
model.train()
for epoch in range(EPOCH):
correct = 0
loss_list = 0
correct = 0
for i, (img, x1,x2) in enumerate(train_loader):
img = img.to(device)
x1 = x1.to(device)
x2 = x2.to(device)
y_pred = model(img)
loss = loss_fn(y_pred, torch.stack(x1,x2))
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_list += loss.item()
correct += (y_pred == torch.stack(x1,x2)).sum().item()
if i % 32 == 0:
print(f"Epoch: {epoch}, Step: {i}, Loss: {loss / 32} , Correct: {(correct/32) * 100}%")
loss_total.append(loss / 32)
plt.plot(np.linspace(1, EPOCH, EPOCH).astype(int), loss_total)
As you see the model has no cnn, or anything special, I’m trying to play around with normal NN then go up, I’m learning from a book and trying to apply what I learn on my own data.
Thank you