I have an video in form of numpy array (Picture Number,x,y). And my mouse events corresponding to the pictures (Number,x,y,click). Now i tried the Approach in my code which oblivious doesn’t work. I couldn’t find anything specific in the internet either. Can someone help me how i import the dataset properly?
My code so far:
import torch
import torchvision
import torchvision.transforms as transforms
from PIL import Image
from CustomDataset import CustomMouseDataset,Rescale
def load_data():
# transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
#
# #Load Recoreded Data
# with h5py.File('data/video_data_22_7_2018_17_46','r') as data:
# video = data['video'][()]
# mouse = data['mouse'][()]
# video = video[:50]
# mouse = mouse[:50]
transform = transforms.Compose([transforms.ToTensor()])
train_data = CustomMouseDataset('data/video_data_22_7_2018_17_46',transform)
train_loader = torch.utils.data.DataLoader(train_data,batch_size=10,shuffle=True)
return train_loader
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1,10, 560, 656)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(656, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
train_data = load_data()
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = 0.001, momentum = 0.9)
#Train the Network
for epoch in range(2):
running_loss = 0.0
for i,data in enumerate(train_data,0):
inputs = data['frame']
labels = data['mouse']
print(type(inputs))
print(inputs)
print(inputs.shape)
#Zero gradients Parameter
optimizer.zero_grad()
#forward + backward +optimize
outputs = net(inputs)
loss = criterion(outputs,labels)
loss.backward()
optimizer.step()
if i % 4 == 1: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 5))
running_loss = 0.0
print('Finished Training')
And the Dataloader
import h5py
import torchvision
from PIL import Image
def load_data(h5py_file):
with h5py.File(h5py_file,'r') as data:
video = data['video'][()]
mouse = data['mouse'][()]
video = video[:50]
mouse = mouse[:50]
return video,mouse
class CustomMouseDataset():
"""Dataloader for Custom dataset"""
def __init__(self, h5py_file,transform = None):
self.video,self.mouse = load_data(h5py_file)
self.transform = transform
def __len__(self):
return len(self.mouse)
def __getitem__(self,idx):
frame = self.video[idx]
frame = Image.fromarray(frame)
frame = self.transform(frame)
mouse = self.mouse[idx]
sample= {'frame': frame, 'mouse': mouse}
# if self.transform:
# sample = self.transform(sample)
return sample
class Rescale(object):
def __init__(self,output_size):
assert isinstance(output_size,(int,tuple))
self.output_size = output_size
def __call__(self,sample):
frame, mouse = sample['frame'],sample['mouse']
h,w = frame.shape[:2]
if isinstance(self.output_size, int):
if h > w:
new_h, new_w = self.output_size * h / w, self.output_size
else:
new_h, new_w = self.output_size, self.output_size * w / h
else:
new_h, new_w = self.output_size
new_h, new_w = int(new_h), int(new_w)
fra = transform.resize(frame, (new_h, new_w))
return {'image':fra,'mouse':mouse}
Greeting Losspost