RuntimeError: size mismatch, m1: [1 x 3951], m2: [1682 x 20]

Saswata_Bhattacharya · February 19, 2020, 12:48pm

import os.path
import csv
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
from pathlib import Path

Importing the dataset

movies = pd.read_csv(‘ml-1m/movies.dat’, sep = ‘::’, header = None, engine = ‘python’, encoding = ‘latin-1’)
users = pd.read_csv(‘ml-1m/users.dat’, sep = ‘::’, header = None, engine = ‘python’, encoding = ‘latin-1’)
ratings = pd.read_csv(‘ml-1m/ratings.dat’, sep = ‘::’, header = None, engine = ‘python’, encoding = ‘latin-1’)

Preparing the training set and the test set

training_set = pd.read_csv(‘ml-100k/u2.base’, delimiter = ‘\t’)
training_set = np.array(training_set, dtype = ‘int’)
test_set = pd.read_csv(‘ml-100k/u2.test’, delimiter = ‘\t’)
test_set = np.array(test_set, dtype = ‘int’)

Getting the number of users and movies

nb_users = int(max(max(training_set[:,0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:,1])))

Converting the data into an array with users in lines and movies in columns

def convert(data):
new_data = []
for id_users in range(1, nb_users + 1):
id_movies = data[:,1][data[:,0] == id_users]
id_ratings = data[:,2][data[:,0] == id_users]
ratings = np.zeros(nb_movies)
ratings[id_movies - 1] = id_ratings
new_data.append(list(ratings))
return new_data
training_set = convert(training_set)
test_set = convert(test_set)

Converting the data into Torch tensors

training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

class SAE(nn.Module):
def init(self, ):
super(SAE, self).init()
self.fc1 = nn.Linear(nb_movies, 20)
self.fc2 = nn.Linear(20, 10)
self.fc3 = nn.Linear(10, 20)
self.fc4 = nn.Linear(20, nb_movies)
self.activation = nn.Sigmoid()
def forward(self, x):
x = self.activation(self.fc1(x))
x = self.activation(self.fc2(x))
x = self.activation(self.fc3(x))
x = self.fc4(x)
x.view(x.size(0), -1)
return x
sae = SAE()
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5)

from pathlib import Path, PureWindowsPath
filename = Path(“D:/Movie-Recommendation-System-using-AutoEncoders-master/Movie-Recommendation-System-using-AutoEncoders-master”)

Convert path to Windows format

path_on_windows = PureWindowsPath(filename)
print(path_on_windows)

def load_checkpoint(path_on_windows):
checkpoint = torch.load(path_on_windows)
model = checkpoint[‘model’]
model.load_state_dict(checkpoint[‘state_dict’])
for parameter in model.parameters():
parameter.requires_grad = False
model.eval()
return model

model = load_checkpoint(‘checkpoint.pth’)
#print(model)

movie_dict = {3709 : ‘Sleepwalkers’,
2846 : ‘Adventures of Milo and Otis’,
3880 : ‘Ballad of Ramblin Jack’,
2971 : ‘All That Jazz’,
3951 : ‘Two Family House’,
3681 : ‘For a Few Dollars More’,
3921 : ‘Beach Party’,
3541 : ‘Third World Cop’,
2189 : ‘I Married A Strange Person’,
3687 : ‘Light Years’,
3390 : ‘Shanghai Surprise’,
2940 : ‘Gilda’,
3857 : ‘Bless the Child’,
1464 : ‘Lost Highway’,
3376 : ‘Fantastic Night-The (La Nuit Fantastique)’,
3670 : ‘Story of G.I. Joe’,
3906 : ‘Under Suspicion’,
792 : ‘Hungarian Fairy Tale’}
count = nb_users
sid = count+1
sid2 = sid-1
with open(‘ml-100k/u2.base’, mode=‘a’) as file:
writer = csv.writer(file, delimiter=’\t’, lineterminator=’\n’)
for iD in movie_dict:
#print(movie_dict[iD])
#a = int(input(‘Enter rating:’))
writer.writerow([sid,iD,4,‘888692464’])
file.close()
#nb_users = int(max(max(training_set[:,0]), max(test_set[:,0])))
training_set = pd.read_csv(‘ml-100k/u2.base’, delimiter = ‘\t’)
training_set = np.array(training_set, dtype = ‘int’)
training_set = convert(training_set)
training_set = torch.FloatTensor(training_set)
sid = 954
input = Variable(training_set[sid]).unsqueeze(0)
output = model(input)
lol = output.detach().numpy()
loll = lol[0]
for i in loll:
if(i>4):
print(100,’:’,round(i))

The above is my code for recommender system using autoencoder. While running the code, I am getting the following error:

RuntimeError: size mismatch, m1: [1 x 3951], m2: [1682 x 20] at C:\w\1\s\windows\pytorch\aten\src\TH/generic/THTensorMath.cpp:136

ptrblck · February 20, 2020, 12:50am

Based on the error message I assume the first layer might throw this error.
Could you check the shape of your input tensors and make sure the in_features are set to the number of features for each sample?

PS: You can add code snippets by wrapping them into three backticks ```