Trying to feed a 3D matrix into a restricted boltzmann machine

Hi, I’m getting this error:

Traceback (most recent call last):

  File "<ipython-input-4-45f10113ccfa>", line 9, in <module>
    ph0,_ = rbm.sample_h(v0)

  File "<ipython-input-3-fbee6246ec62>", line 14, in sample_h
    wx = torch.mm(x, self.W.t()) #Transpose!

RuntimeError: matrices expected, got 3D, 2D tensors at C:\w\1\s\tmp_conda_3.5_051104\conda\conda-bld\pytorch_1565413984421\work\aten\src\TH/generic/THTensorMath.cpp:747

My model architecture is defined below

import numpy as np
import pandas as pd

movies = pd.read_csv('ml-1m/movies.dat', sep='::', header = None, engine = 'python', 
                     encoding = 'latin-1')
users = pd.read_csv('ml-1m/users.dat', sep='::', header = None, engine = 'python', 
                     encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep='::', header = None, engine = 'python', 
                     encoding = 'latin-1')
training_set = pd.read_csv('ml-100k/u1.base',
                           delimiter='\t',
                           header=None, # First row is not header
                           names=["user", "movie",
                                  "rating", "timestamp"]) # rename headers
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t',
                       header = None,
                       names = ["user", "movie", "rating", "timestamp"])
def convert(data):
    ratings = pd.pivot_table(data,
                         index=["user"],
                         columns=["movie"],
                         values="rating")
    timestamps = pd.pivot_table(data,
                         index=["user"],
                         columns=["movie"],
                         values="timestamp")
    mat_ratings = ratings.values
    mat_timestamps = timestamps.values
    mat3d = np.dstack((mat_ratings, mat_timestamps))
    return mat3d
    
#Below, both training and test set get converted into 3D matrices
training_set = convert(training_set)
test_set = convert(test_set)
training_set[np.isnan(training_set)] = 0
test_set[np.isnan(test_set)] = 0

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
training_set = torch.FloatTensor(training_set) 
test_set = torch.FloatTensor(test_set)


#Converting the ratings into binary ratings 1 (linked) or 0 (not linked)
training_set[training_set == 0] = -1 #Converting all 0s to -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

#Creating the architecture of the NN
class RBM:
    def __init__(self, nv, nh):
        self.W=torch.randn(nh, nv) 
        self.a = torch.randn(1, nh)
        self.b = torch.randn(1, nv) 
    def sample_h(self, x): 
        wx = torch.mm(x, self.W.t()) #Transpose!
        activation = wx + self.a.expand_as(wx) 
        p_h_given_v = torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    def sample_v(self, y): 
        wy = torch.mm(y, self.W) 
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)
    def train(self, v0, vk, ph0, phk): 
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
        self.b += torch.sum((v0 - vk), 0) #Summing with 0 to keep the format of b as tensor of 2D
        self.a += torch.sum((ph0 - phk), 0)
nv = len(training_set[0])
nh = 100 #This we can tune. Here we detect 100 features
batch_size = 100 
rbm = RBM(nv, nh) 
nb_users = 943
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0.0 
    for id_user in range(0, nb_users - batch_size, batch_size):
        vk = training_set[id_user:id_user+batch_size]
        v0 = training_set[id_user:id_user+batch_size]
        ph0,_ = rbm.sample_h(v0)
        for k in range(10): #for the k steps of CD
            _, hk = rbm.sample_h(vk) #Putting vk not v0 so v0 doesn't change as it is TARGET
            _, vk = rbm.sample_v(hk)
            vk[v0<0] = v0[v0<0] #Freezing the -1 values so they don't get trained
        phk, _ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)
        train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[vk >= 0]))
        s += 1.0
    print("Epoch: "+ str(epoch) + " Loss: " + str(train_loss/s))
    
test_loss = 0
s = 0.0 
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0: 
        _, h = rbm.sample_h(v) 
        _, v = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(vt[vt >= 0] - v[vt >= 0]))
        s += 1.0
print("Test loss: "+str (test_loss/s))

The data here has all the unique users in rows, all the unique movies in the columns and the values of this table as the ratings given by the users to those movies. The third dimension is the timestamps (as in, when the specific user rated the specific movie). I’m trying to feed this data into a Restricted Boltzmann Machine which works fine on 2D data (without the timestamps vectored in). This is somewhat urgent, any help would be really appreciated.

torch.mm performs a matrix multiplication with two matrices, while your input seems to be 2 and 3-dimensional.
Could you check the shape of self.W, please?
Also, which function throws this error?

So Pytorch won’t work for the cases with feature size more than 3?
How to approach about that?

You can use torch.matmul for inputs with more than 2 dimensions.