Hello, I’m trying to convert the videos from KTH dataset (Recognition of human actions) as it is said here (KTH-Dataset/sequences_list.txt at master · tejaskhot/KTH-Dataset · GitHub). I just got the data in the way the authors did, by making it by hand. At this moment, my code is the one below it is working, but not when it gets to the training. Can anyone help me? Thanks in advance.
# Import modules
import torch
from numpy import random
import matplotlib.pyplot as plt
import torchvision
from torchvision import transforms
from torchvision import datasets
from torch import optim
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
from matplotlib import pyplot as plt
import numpy as np
import requests
import torch
from torchvision import models
from torchvision import transforms
import torch.nn as nn
from tqdm import tqdm
import shutil
from urllib.request import urlretrieve
from torch.utils.data import Dataset, DataLoader
#Class labels
classes = {'boxing':0, 'handclapping':1, 'handwaving':2, 'jogging':3, 'running':4, 'walking':5}
#Dataset
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)
import glob
folders_train = glob.glob('/content/drive/MyDrive/HumanActions_new_data/TRAIN')
count = 0
for i in range(0,len(folders_train)):
video_data = folders_train[i]
video = cv2.VideoCapture(video_data)
success = True
while success:
success,image = video.read()
name = '/content/drive/MyDrive/HumanActions_new_data/TRAIN/Frames'+str(count)+'.jpg'
if success == True:
cv2.imwrite(name,image)
print('Frame {} Extracted Successfully'.format(count))
count+=1
else:
i = i+1
i = i+1
print('\n\n\nVideo {} Extracted Successfully\n\n\n'.format(video_data))
train_dataloader = DataLoader('/content/drive/MyDrive/HumanActions_new_data/TRAIN',
batch_size=16, shuffle=True)
And then I did the exactly same thing for validation and test datasets, and it works. But then I created a RNN and it gives an error
#Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#Hyperparameters
input_size = 120
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 6
learning_rate = 0.001
batch_size = 64
num_epochs = 2
import torch.nn as nn
class RNNModel(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(RNNModel, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size*sequence_length, num_classes)
def forward(self, x):
# Initialize hidden state with zeros
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
# One time step
out, _ = self.rnn(x, h0)
out = out.reshape(out.shape[0], -1)
return out
model = RNNModel(input_size, hidden_size, num_layers, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
for data, targets in enumerate(train_dataloader, 1):
# Get data to cuda if possible
data = data.to(device)
targets = targets.to(device=device)
# Get to correct shape
data = data.reshape(data.shape[0], -1)
# Forward
scores = model(data)
loss = criterion(scores, targets)
# Backward
optimizer.zero_grad()
loss.backward()
# Gradient descent/ Adam step
optimizer.step()
AttributeError Traceback (most recent call last)
in ()
2 for data, targets in enumerate(train_dataloader, 1):
3 # Get data to cuda if possible
----> 4 data = data.to(device)
5 targets = targets.to(device=device)
6
AttributeError: ‘int’ object has no attribute ‘to’
I thought it could be because train_dataset is not a tensor, but I don’t know how to convert it into a tensor, I’ve tried to put frames in a list and then convert it into a tensor, but it takes too long. Thanks in advance.