Feedfoward neural network for MediaPipe Hand Landmark data

I am working on a project where I am trying to classify certain hand positions, recording hand landmark data via MediaPipe, and then using this information to train a model. I am having issues getting the configuration squared away compared to another model I built using image data.

The planned functionality was to store a list of lists for all xyz coordinates for the 21 hand landmark locations separated by new line characters in text files with the same name as their respective class. The main method takes these files and uses them to bulild out one parent list of list that contains these whole hand landmarks and their class and feed that into a dataset. This does not seem to be working as my model is failing in the ‘train_model’ method when on line ‘scores = model(data.unsqueeze(0))’. The error I get state “Given input size: (192x5x1). Calculated output size: (192x2x0). Output size is too small” I have seen a bunch of other posts with a closely related issue but I’m too inexperienced to determine a generalized solution to this issue for my specific project.

Custom data set definition

class LandmarkDataset(Dataset):

def init(self, data, transform=None):
self.annotations = data
self.transform = transform

def len(self):
return len(self.annotations)

def getitem(self, index):
landmarks = torch.tensor(self.annotations[index][0])
y_label = torch.tensor(int(self.annotations[index][1]))

And I have my main python file defined as follows

File pathings

modelPATH = ‘model.pt’
trainingPATH = ‘Hand Models/’

Neural Network setttings

cudnn.benchmark = True
num_classes = 10
in_channel = 3
learning_rate = 1e-3
batch_size = 3
num_epochs = 1

def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
model.eval()

with torch.no_grad():
    for x, y in loader:
        x = x.to(device=device)
        y = y.to(device=device)
        
        scores = model(x)
        _, predictions = scores.max(1)
        num_correct += (predictions == y).sum()
        num_samples += predictions.size(0)
    
    print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}') 

model.train()

def train_model(device):

model = torchvision.models.googlenet(pretrained=True)
model.to(device)

# settings for training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# model training
for epoch in range(num_epochs):
    losses=[]
    for batch_idx, (data, targets) in enumerate(train_loader):
        #get data to cuda
        #print(targets)
        data = data.to(device=device)
        targets = targets.to(device=device)

        # forward
        print(data)
        scores = model(data.unsqueeze(0))
        loss = criterion(scores, targets)
        losses.append(loss.item())

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent
        optimizer.step()

    print(f'Cost at epoch {epoch} is {sum(losses)/len(losses)}')
return model

def build_array(path):
ret =
for file in os.listdir(path):
if file.endswith(“.txt”):
with open(path+file, ‘r’) as file:
text = file.read()
for line in text.splitlines():
landmark =
line = (‘[’ + line + ‘]’).replace(" “, “”)
line = line.replace(”][“, “], [”)
line = json.loads(line)
landmark.append(line)
landmark.append(file.name.split(”/“)[1].split(”.")[0])
ret.append(landmark)
return ret

if name ==“main”:

# Define CUDA Device
device = torch.device('cuda')
print('CUDA Capable device found and set to ' + torch.cuda.get_device_name(torch.cuda.current_device()))

if (not os.path.isfile(modelPATH)):
    # iterate over txt files and generate a numpy list of landmarks and their respective classes
    landmark_classes = build_array(trainingPATH)

    dataset = LandmarkDataset(data = landmark_classes, transform = transforms.ToTensor())

    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_set, test_set = torch.utils.data.random_split(dataset, [train_size, test_size])
    test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)
    train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)

    model = train_model(device)
    model.to('cuda')

    print('Testing accuracy on newly trained model')
    check_accuracy(test_loader, model)
    #save new model
    torch.save(model, "model.pt")
    model.eval()
else:
    # load existing model
    model = torch.load(modelPATH)
    model.to(device='cuda')
    model.eval()