I am studying AI and now I am trying to understand and update a code which was made for older version of Pytorch. I have tried it for versions 1.0 & 1.5 with similar results. Variable has been deprecated and when creating a tensor
requires_grad=True
should be used. Also I was advised that instead of
torch.Tensor
, which is an alias of
torch.FloatTensor
, I should use
torch.tensor
, which should automatically determine the data type.
But there are errors which I am not able to handle:
return map(lambda x: Variable(torch.cat(x, 0)), samples) #putting the samples in to pytorch variable
as well as when I remove Variable
return map(lambda x: torch.cat(x, 0), samples)
gets the same error:
RuntimeError: Tensors must have same number of dimensions: got 2 and 1
Code is:
import numpy as np
import random
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variableclass Network(nn.Module): # Creating the architecture of the Neural Network
def __init__(self, input_size, nb_action): super(Network, self).__init__() #to use all tools from nn.Module self.input_size = input_size self.nb_action = nb_action self.fc1 = nn.Linear(input_size, 30) #creating the full connection between input & hidden layer self.fc2 = nn.Linear(30, nb_action) def forward(self, state): #forward propagation x = F.relu(self.fc1(state)) q_values = self.fc2(x) return q_values
class ReplayMemory(object): # Implementing Experience Replay
def __init__(self, capacity): self.capacity = capacity #maximum number of events in memory self.memory = [] def push(self, event): #append new event in to the memory up to a maximum memory size self.memory.append(event) if len(self.memory) > self.capacity: del self.memory[0] def sample(self, batch_size): #take random samples form the memory # if list=((1,2,3),(4,5,6)) then zip(*list)=((1,4),(2,5),(3,6)) # events = (state,action,reward) we need (state1,state2), (action1,action2), (reward1,reward2) samples = zip(*random.sample(self.memory, batch_size)) # torch.cat aligns everything as (state, action, reward) # OLD-> return map(lambda x: Variable(torch.cat(x, 0)), samples) #putting samples in a pytorch variable return map(lambda x: torch.cat(x, 0), samples) #<-NEW #putting samples in a pytorch variable
class Dqn(): # Implementing Deep Q Learning
def __init__(self, input_size, nb_action, gamma): self.gamma = gamma self.reward_window = [] self.model = Network(input_size, nb_action) self.memory = ReplayMemory(100000) #memory capacity self.optimizer = optim.Adam(self.model.parameters(), lr = 0.001)
#OLD-> self.last_state = torch.Tensor(input_size).unsqueeze(0)
self.last_state = torch.tensor(input_size, requires_grad=True).unsqueeze(0) #<-NEW
self.last_action = 0
self.last_reward = 0def select_action(self, state): with torch.no_grad(): probs = F.softmax(self.model(state), dim=1)*100 # T=100 action = probs.multinomial(num_samples=1) return action.data[0,0] # obsolete # def select_action(self, state): # probs = F.softmax(self.model(Variable(state, volatile = True))*100) # T=100 # action = probs.multinomial() # return action.data[0,0] def learn(self, batch_state, batch_next_state, batch_reward, batch_action): outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1) next_outputs = self.model(batch_next_state).detach().max(1)[0] target = self.gamma*next_outputs + batch_reward td_loss = F.smooth_l1_loss(outputs, target) self.optimizer.zero_grad() td_loss.backward(retain_graph = True) self.optimizer.step() def update(self, reward, new_signal): #OLD-> new_state = torch.Tensor(new_signal).float().unsqueeze(0) new_state = torch.tensor(new_signal, requires_grad=True, dtype=torch.float).float().unsqueeze(0) #<-NEW self.memory.push((self.last_state, new_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward]))) action = self.select_action(new_state) if len(self.memory.memory) > 100: batch_state, batch_next_state, batch_action, batch_reward = self.memory.sample(100) self.learn(batch_state, batch_next_state, batch_reward, batch_action) self.last_action = action self.last_state = new_state self.last_reward = reward self.reward_window.append(reward) if len(self.reward_window) > 1000: del self.reward_window[0] return action def score(self): return sum(self.reward_window)/(len(self.reward_window)+1.) def save(self): torch.save({'state_dict': self.model.state_dict(), 'optimizer' : self.optimizer.state_dict(), }, 'last_brain.pth') def load(self): if os.path.isfile('last_brain.pth'): print("=> loading checkpoint... ") checkpoint = torch.load('last_brain.pth') self.model.load_state_dict(checkpoint['state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) print("done !") else: print("no checkpoint found...")