Updating the code for newer Pytorch and getting RuntimeError: Tensors must have same number of dimensions: got 2 and 1

I am studying AI and now I am trying to understand and update a code which was made for older version of Pytorch. I have tried it for versions 1.0 & 1.5 with similar results. Variable has been deprecated and when creating a tensor

requires_grad=True

should be used. Also I was advised that instead of

torch.Tensor

, which is an alias of

torch.FloatTensor

, I should use

torch.tensor

, which should automatically determine the data type.

But there are errors which I am not able to handle:

return map(lambda x: Variable(torch.cat(x, 0)), samples) #putting the samples in to pytorch variable

as well as when I remove Variable

return map(lambda x: torch.cat(x, 0), samples)

gets the same error:

RuntimeError: Tensors must have same number of dimensions: got 2 and 1

Code is:

import numpy as np
import random
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable

class Network(nn.Module): # Creating the architecture of the Neural Network

def __init__(self, input_size, nb_action):
    super(Network, self).__init__()  #to use all tools from nn.Module
    self.input_size = input_size
    self.nb_action = nb_action
    self.fc1 = nn.Linear(input_size, 30) #creating the full connection between input & hidden layer
    self.fc2 = nn.Linear(30, nb_action)

def forward(self, state):   #forward propagation
    x = F.relu(self.fc1(state))
    q_values = self.fc2(x)
    return q_values

class ReplayMemory(object): # Implementing Experience Replay

def __init__(self, capacity):
    self.capacity = capacity  #maximum number of events in memory
    self.memory = []

def push(self, event):  #append new event in to the memory up to a maximum memory size
    self.memory.append(event)
    if len(self.memory) > self.capacity:
        del self.memory[0]

def sample(self, batch_size):   #take random samples form the memory
    # if list=((1,2,3),(4,5,6)) then zip(*list)=((1,4),(2,5),(3,6))
    # events = (state,action,reward) we need (state1,state2), (action1,action2), (reward1,reward2)
    samples = zip(*random.sample(self.memory, batch_size))
    # torch.cat aligns everything as (state, action, reward)
    # OLD-> return map(lambda x: Variable(torch.cat(x, 0)), samples) #putting samples in a pytorch variable
    return map(lambda x: torch.cat(x, 0), samples) #<-NEW    #putting samples in a pytorch variable

class Dqn(): # Implementing Deep Q Learning

def __init__(self, input_size, nb_action, gamma):
    self.gamma = gamma
    self.reward_window = []
    self.model = Network(input_size, nb_action)
    self.memory = ReplayMemory(100000)   #memory capacity
    self.optimizer = optim.Adam(self.model.parameters(), lr = 0.001)

#OLD-> self.last_state = torch.Tensor(input_size).unsqueeze(0)
self.last_state = torch.tensor(input_size, requires_grad=True).unsqueeze(0) #<-NEW
self.last_action = 0
self.last_reward = 0

def select_action(self, state):
    with torch.no_grad():
        probs = F.softmax(self.model(state), dim=1)*100 # T=100
        action = probs.multinomial(num_samples=1)
        return action.data[0,0]

# obsolete
#    def select_action(self, state):
#        probs = F.softmax(self.model(Variable(state, volatile = True))*100) # T=100
#        action = probs.multinomial()
#        return action.data[0,0]        




def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
    outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
    next_outputs = self.model(batch_next_state).detach().max(1)[0]
    target = self.gamma*next_outputs + batch_reward
    td_loss = F.smooth_l1_loss(outputs, target)
    self.optimizer.zero_grad()
    td_loss.backward(retain_graph = True)
    self.optimizer.step()

def update(self, reward, new_signal):
    #OLD-> new_state = torch.Tensor(new_signal).float().unsqueeze(0)
    new_state = torch.tensor(new_signal, requires_grad=True, dtype=torch.float).float().unsqueeze(0)   #<-NEW

    self.memory.push((self.last_state, new_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward])))
    action = self.select_action(new_state)
    if len(self.memory.memory) > 100:
        batch_state, batch_next_state, batch_action, batch_reward = self.memory.sample(100)
        self.learn(batch_state, batch_next_state, batch_reward, batch_action)
    self.last_action = action
    self.last_state = new_state
    self.last_reward = reward
    self.reward_window.append(reward)
    if len(self.reward_window) > 1000:
        del self.reward_window[0]
    return action

def score(self):
    return sum(self.reward_window)/(len(self.reward_window)+1.)

def save(self):
    torch.save({'state_dict': self.model.state_dict(),
                'optimizer' : self.optimizer.state_dict(),
               }, 'last_brain.pth')

def load(self):
    if os.path.isfile('last_brain.pth'):
        print("=> loading checkpoint... ")
        checkpoint = torch.load('last_brain.pth')
        self.model.load_state_dict(checkpoint['state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer'])
        print("done !")
    else:
        print("no checkpoint found...")

Hi,

Not sure what is the goal of this line?
The error happens because the elements in x don’t have the same number of dimensions: some are 1D and others 2D which cat cannot handle.

This code was write to run with Pytorch=0.3.1.
I’m studyng this code too.