Unable to use torch.cat

I am receiving an error whilst looping through the batch The error: expected Tensor as element 0 in argument 0, but got builtin_function_or_method

I followed a tutorial on Udemy, but I am not sure what the issue is.

The Code:

def sample(self, batch_size):
        
        assert self.can_sample(batch_size)
        
        batch = random.sample(self.memory, batch_size)
        
    
        batch = zip(*batch)

        
        return [torch.cat(items) for items in batch]


    def can_sample(self, batch_size):
        
        return len(self.memory) >= batch_size * 10

The error:

TypeError                                 Traceback (most recent call last)
Cell In[52], line 1
----> 1 stats = deep_sarsa(q_network, policy, 2000, epsilon = 0.01)

Cell In[51], line 24, in deep_sarsa(q_network, policy, episodes, alpha, batch_size, gamma, epsilon)
     21 memory.insert([state, action, rewards, done, next_state])
     23 if (memory.can_sample(batch_size)):
---> 24     state_b, action_b, reward_b, done_b, next__state_b = memory.sample(batch_size)
     26     qsa_b = q_network(state_b).gather(1, action_b)
     27     next_action_b = policy(next_state_b, epsilon)

Cell In[50], line 46, in ReplayMemory.sample(self, batch_size)
     38 batch = zip(*batch)
     41 #batch = torch.tensor(batch, dtype=torch.int8)
     42 #torch.cat() -> concatinate the elements in a single tensor.
---> 46 return [torch.cat(items) for items in batch]

Cell In[50], line 46, in <listcomp>(.0)
     38 batch = zip(*batch)
     41 #batch = torch.tensor(batch, dtype=torch.int8)
     42 #torch.cat() -> concatinate the elements in a single tensor.
---> 46 return [torch.cat(items) for items in batch]

TypeError: expected Tensor as element 0 in argument 0, but got builtin_function_or_method

Seems to be used improperly. Not at a computer, but this should work:

import torch

# build list of tensors
x=[]
for i in range(10):
    x.append(torch.rand(4,3,5))

# test cat function
y = torch.cat([_ for _ in x])

I tried your method, does not work.
I get the following:
expected Tensor as element 0 in argument 0, but got tuple

@LastJedi the above code worked for me. But it seems you do not have a list of tensors, but a list of tuples. Without knowing what your memory is, exactly, or how you want to use it, a specific solution is not possible.

You could try adding an index to it, i.e. torch.cat([_[0] for _ in x]). But that assumes the first index is what you need, that it is a tensor, and that you want to concatenate it on dim=0.

Here is the class for the replay memory

class ReplayMemory:
    
    
    def __init__(self, capacity = 1000000):
        
        self.capacity = capacity
        self.memory = []
        self.position = int(0)
        
    
    # insert(). method which will allow us to insert a state transition in memory\
    
    def insert(self, transition):
        
        if (len(self.memory) < self.capacity):
            
            self.memory.append(None)
        
        self.memory[int(self.position)] = transition
        
        self.position = (self.position + 1) % self.capacity
    
    
    
    #Sample(). Sample batches of exp in memory
    
    def sample(self, batch_size):
        
        assert self.can_sample(batch_size)
        
        batch = random.sample(self.memory, batch_size)
        
        #list of transitions, has the shape [[s, a, r, s`], [s, a, r, s`]] ( 2 transitions present)
        # we want the following -> [[s,s], [a,a], [r,r], [s`,s`]]
        
        # * -> unpack all the transitions from the list
        # zip() -> pick the first element in each one of the transitions an group them
        batch = zip(*batch)
        
       
       
        return [torch.cat(items) for items in batch]
        
        
    
    def can_sample(self, batch_size):
        
        return len(self.memory) >= batch_size * 10
    
    
    def __len__(self):
        return len(self.memory)

Deep SARSA algorithm that was followed from a Udemy course

def deep_sarsa(q_network, policy, episodes, alpha = 0.001, batch_size = 32, gamma = 0.99, epsilon = 0.05):
    
    
    optim = AdamW(q_network.parameters(), lr = alpha)
    
    memory = ReplayMemory(capacity = 1e6)
    
    stats =  {'MSE Loss': [], 'Returns': []}
    
    
    for episodes in tqdm(range(1, episodes + 1)):
        
        state = env.reset()
        ep_return = 0.
        done = False
        
        while not done:
            
            action = policy(state, epsilon)
            next_state, rewards, done, _ = env.step(action)
            memory.insert([state, action, rewards, done, next_state])
            
            if (memory.can_sample(batch_size)):
                state_b, action_b, reward_b, done_b, next__state_b = memory.sample(batch_size)
                
                qsa_b = q_network(state_b).gather(1, action_b)
                next_action_b = policy(next_state_b, epsilon)
                
                next_qsa_b = target_q_network(next_state_b).gather(1, next_action_b)
            
                target_b = reward_b + -done_b * gamma * next_qsa_b
            
                loss = F.mse_loss(qsa_b, target_b)
                
                q_network.zero_grad() 

                loss.backward()
                
                optim.step()
                
                stats['MSE Loss'].append(loss.item())
                
            state = next_state

            ep_return += rewards().item()
            
        stats['Returns'].append(ep_return)
        
        if(episodes % 10 == 0):
            
            target_q_network.load_state_dict(q_network.state.dict())
    
    return stats
    

I solved the issue.

There was a typo in my code. Instead of using .float(), I used .float :man_facepalming: