RuntimeError - size mismatch when using qnetwork with eligibility trace

Hello there,

I have tried to use some of existing code that and get the same error as stated in the title and can also be seen below. I get my state from my environment which is simulink because we are doing a pump project for the biggest circular pump manufacture in the world.

output:

action output <class 'numpy.ndarray'>
hej
eli <class 'torch.autograd.variable.Variable'> value Variable containing:
 18.9000
 19.0000
[torch.FloatTensor of size 2]

Traceback (most recent call last):
  File "ai_eligibility.py", line 139, in <module>
    inputs, targets = eligibility_trace(batch)
  File "ai_eligibility.py", line 104, in eligibility_trace
    output = qnetwork(input)
  File "C:\Users\koch\Anaconda3\envs\py36_pytorch_kivy\lib\site-packages\torch\nn\modules\module.py", line 325, in __call__
    result = self.forward(*input, **kwargs)
  File "ai_eligibility.py", line 47, in forward
    x = F.relu(self.fc1(state))
  File "C:\Users\koch\Anaconda3\envs\py36_pytorch_kivy\lib\site-packages\torch\nn\modules\module.py", line 325, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Users\koch\Anaconda3\envs\py36_pytorch_kivy\lib\site-packages\torch\nn\modules\linear.py", line 55, in forward
    return F.linear(input, self.weight, self.bias)
  File "C:\Users\koch\Anaconda3\envs\py36_pytorch_kivy\lib\site-packages\torch\nn\functional.py", line 837, in linear
    output = input.matmul(weight.t())
  File "C:\Users\koch\Anaconda3\envs\py36_pytorch_kivy\lib\site-packages\torch\autograd\variable.py", line 386, in matmul
    return torch.matmul(self, other)
  File "C:\Users\koch\Anaconda3\envs\py36_pytorch_kivy\lib\site-packages\torch\functional.py", line 168, in matmul
    return torch.mm(tensor1.unsqueeze(0), tensor2).squeeze_(0)
RuntimeError: size mismatch, m1: [1 x 2], m2: [1 x 30] at c:\anaconda2\conda-bld\pytorch_1513133520683\work\torch\lib\th\generic/THTensorMath.c:1416

This is my ai code:

# AI for pump



# Importing the libraries
import numpy as np
import random
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable


# Importing the other Python files
import experience_replay_4_eligibility
import env


# Part 1 - Building the AI

# Making the brain

# Creating the architecture of the Neural Network
class Qnetwork(nn.Module): #inherinting from nn.Module
    
    def __init__(self, input_size, nb_action):
        super(Qnetwork, self).__init__()
        # Input and output neurons
        self.input_size = input_size
        self.nb_action = nb_action
        self.fc1 = nn.Linear(input_size, 30)
        self.fc2 = nn.Linear(30, nb_action) # 30 neurons in hidden layer
    
    # For function that will activate neurons and perform forward propagation
    def forward(self, state):
        x = F.relu(self.fc1(state))
        q_values = self.fc2(x)
        return q_values

# Making the body

class SoftmaxBody(nn.Module):
    
    def __init__(self, T):
        super(SoftmaxBody, self).__init__()
        self.T = T

    def forward(self, outputs):
        probs = F.softmax(outputs * self.T)   
        actions = probs.multinomial()
        return actions

# Making the AI

class AI:

    def __init__(self, brain, body):
        self.brain = brain
        self.body = body

    def __call__(self, inputs):
        input = Variable(torch.from_numpy(np.array(inputs, dtype = np.float32)))
        output = self.brain(input)
        actions = self.body(output)
        return actions.data.numpy()



# Part 2 - Training the AI with Deep Convolutional Q-Learning

#Getting the simulink environment
# Creating Connection for sender and receiver socket
env_simu = env.environment()
env_simu.createServerSockets()
# Building an AI
qnetwork = Qnetwork(1,20)
softmax_body = SoftmaxBody(T = 1.0)
ai = AI(brain = qnetwork, body = softmax_body)
env_simu.sendAction(0)

# Setting up Experience Replay
n_steps = experience_replay_4_eligibility.NStepProgress(env = env_simu, ai = ai, n_step = 10)
memory = experience_replay_4_eligibility.ReplayMemory(n_steps = n_steps, capacity = 10000)

# Implementing Eligibility Trace
def eligibility_trace(batch):
    gamma = 0.99
    inputs = []
    targets = []
    for series in batch:
        input = Variable(torch.from_numpy(np.array([series[0].state, series[-1].state], dtype = np.float32)))
        print('eli', type(input),'value',input)
        output = qnetwork(input)
        cumul_reward = 0.0 if series[-1].done else output[1].data.max()
        for step in reversed(series[:-1]):
            cumul_reward = step.reward + gamma * cumul_reward
        state = series[0].state
        target = output[0].data
        target[series[0].action] = cumul_reward
        inputs.append(state)
        targets.append(target)
    return torch.from_numpy(np.array(inputs, dtype = np.float32)), torch.stack(targets)

# Making the moving average on 100 steps
class MA:
    def __init__(self, size):
        self.list_of_rewards = []
        self.size = size
    def add(self, rewards):
        if isinstance(rewards, list):
            self.list_of_rewards += rewards
        else:
            self.list_of_rewards.append(rewards)
        while len(self.list_of_rewards) > self.size:
            del self.list_of_rewards[0]
    def average(self):
        return np.mean(self.list_of_rewards)
ma = MA(100)

# Training the AI
loss = nn.MSELoss()
optimizer = optim.Adam(qnetwork.parameters(), lr = 0.001)
nb_epochs = 100
for epoch in range(1, nb_epochs + 1):
    memory.run_steps(200)
    for batch in memory.sample_batch(128):
        print('hej')
        inputs, targets = eligibility_trace(batch)
        inputs, targets = Variable(inputs), Variable(targets)
        predictions = qnetwork(inputs)
        loss_error = loss(predictions, targets)
        optimizer.zero_grad()
        loss_error.backward()
        optimizer.step()
    rewards_steps = n_steps.rewards_steps()
    ma.add(rewards_steps)
    avg_reward = ma.average()
    print("Epoch: %s, Average Reward: %s" % (str(epoch), str(avg_reward)))
    if avg_reward >= 1500:
        print("Congratulations, your AI wins")
        break

And this is my experience replay code:

# Experience Replay

# Importing the libraries
import numpy as np
from collections import namedtuple, deque
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

# Defining one Step
Step = namedtuple('Step', ['state', 'action', 'reward'])

# Making the AI progress on several (n_step) steps
last_distance = 0
class NStepProgress:
    
    def __init__(self, env, ai, n_step):
        self.ai = ai
        self.rewards = []
        self.env = env
        self.n_step = n_step
    
    def __iter__(self):
        global last_distance
        goalT1 = 20
        state = self.env.receiveState()
        state = state[0]
        print('staaaate',state)
        state = [state]
        state = torch.Tensor(state).float().unsqueeze(0)
        history = deque()
        reward = 0.0
        print('reward',type(reward))
        print('state',type(state))
        while True: # HUUUUSK DEN LUDER STATE TIL TENSOR ELLER et eller ANDET
            print('state', type(np.array(state)),'value',np.array(state))
            action = self.ai(np.array(state))
            print('action output', type(action))
            action = int(action[0][0])
            self.env.sendAction(action+1)
            next_state = self.env.receiveState()
            next_state = next_state[0]
            distance = ((abs(goalT1 - next_state)))
            #next_state, r, is_done, _ = self.env.step(action)
            
            # Reward Policy
            if 0 <= distance <= 0.2:
                r = 1
                if distance < last_distance:
                    r = r*0.5
            elif distance < last_distance:
                r = 0.1
            else:
                r = -0.5*(distance)


            reward += r
            history.append(Step(state = state, action = action, reward = r))
            while len(history) > self.n_step + 1:
                history.popleft()
            if len(history) == self.n_step + 1:
                yield tuple(history)
            next_state = [next_state]
            next_state = torch.Tensor(next_state).float().unsqueeze(0)
            state = next_state
            self.rewards.append(reward)
            if r == 1:
                if len(history) > self.n_step + 1:
                    history.popleft()
                while len(history) >= 1:
                    yield tuple(history)
                    history.popleft()
                self.rewards.append(reward)
                reward = 0.0
                state = self.env.receiveState()
                state = state[0]
                state = [state]
                state = torch.Tensor(state).float().unsqueeze(0)
                history.clear()
    
    def rewards_steps(self):
        rewards_steps = self.rewards
        self.rewards = []
        return rewards_steps

# Implementing Experience Replay

class ReplayMemory:
    
    def __init__(self, n_steps, capacity = 10000):
        self.capacity = capacity
        self.n_steps = n_steps
        self.n_steps_iter = iter(n_steps)
        self.buffer = deque()

    def sample_batch(self, batch_size): # creates an iterator that returns random batches
        ofs = 0
        vals = list(self.buffer)
        np.random.shuffle(vals)
        while (ofs+1)*batch_size <= len(self.buffer):
            yield vals[ofs*batch_size:(ofs+1)*batch_size]
            ofs += 1

    def run_steps(self, samples):
        while samples > 0:
            entry = next(self.n_steps_iter) # 10 consecutive steps
            self.buffer.append(entry) # we put 200 for the current episode
            samples -= 1
        while len(self.buffer) > self.capacity: # we accumulate no more than the capacity (10000)
            self.buffer.popleft()

i know the problem is in the eligibility trace function when i use the batch from the memory.sample_batch(128), and then put two values into the numpy array but i do not know how this happen or how i can code around this… I hoped you could help me on this on one because i have been staring at this problem for 2 days…

It sounds like you’re doing a matrix multiply where the first matrix is 1 by 2 while the second matrix is 1 by 30. The shapes of these matrices are incompatible.

That error is from deep inside the pytorch source code. The offending line in the model seems to be x = F.relu(self.fc1(state))

My guess is that Qnetwork was initialised with an input_size that doesn’t match the size of the state it receives.

Going a little furthur…

input = Variable(torch.from_numpy(np.array([series[0].state, series[-1].state], dtype = np.float32)))

somehow produces a 1d tensor containing only 2 elements.

That must mean that series[0].state is a number rather than a vector of 20 numbers.