Hello there,
I have tried to use some of existing code that and get the same error as stated in the title and can also be seen below. I get my state from my environment which is simulink because we are doing a pump project for the biggest circular pump manufacture in the world.
output:
action output <class 'numpy.ndarray'>
hej
eli <class 'torch.autograd.variable.Variable'> value Variable containing:
18.9000
19.0000
[torch.FloatTensor of size 2]
Traceback (most recent call last):
File "ai_eligibility.py", line 139, in <module>
inputs, targets = eligibility_trace(batch)
File "ai_eligibility.py", line 104, in eligibility_trace
output = qnetwork(input)
File "C:\Users\koch\Anaconda3\envs\py36_pytorch_kivy\lib\site-packages\torch\nn\modules\module.py", line 325, in __call__
result = self.forward(*input, **kwargs)
File "ai_eligibility.py", line 47, in forward
x = F.relu(self.fc1(state))
File "C:\Users\koch\Anaconda3\envs\py36_pytorch_kivy\lib\site-packages\torch\nn\modules\module.py", line 325, in __call__
result = self.forward(*input, **kwargs)
File "C:\Users\koch\Anaconda3\envs\py36_pytorch_kivy\lib\site-packages\torch\nn\modules\linear.py", line 55, in forward
return F.linear(input, self.weight, self.bias)
File "C:\Users\koch\Anaconda3\envs\py36_pytorch_kivy\lib\site-packages\torch\nn\functional.py", line 837, in linear
output = input.matmul(weight.t())
File "C:\Users\koch\Anaconda3\envs\py36_pytorch_kivy\lib\site-packages\torch\autograd\variable.py", line 386, in matmul
return torch.matmul(self, other)
File "C:\Users\koch\Anaconda3\envs\py36_pytorch_kivy\lib\site-packages\torch\functional.py", line 168, in matmul
return torch.mm(tensor1.unsqueeze(0), tensor2).squeeze_(0)
RuntimeError: size mismatch, m1: [1 x 2], m2: [1 x 30] at c:\anaconda2\conda-bld\pytorch_1513133520683\work\torch\lib\th\generic/THTensorMath.c:1416
This is my ai code:
# AI for pump
# Importing the libraries
import numpy as np
import random
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
# Importing the other Python files
import experience_replay_4_eligibility
import env
# Part 1 - Building the AI
# Making the brain
# Creating the architecture of the Neural Network
class Qnetwork(nn.Module): #inherinting from nn.Module
def __init__(self, input_size, nb_action):
super(Qnetwork, self).__init__()
# Input and output neurons
self.input_size = input_size
self.nb_action = nb_action
self.fc1 = nn.Linear(input_size, 30)
self.fc2 = nn.Linear(30, nb_action) # 30 neurons in hidden layer
# For function that will activate neurons and perform forward propagation
def forward(self, state):
x = F.relu(self.fc1(state))
q_values = self.fc2(x)
return q_values
# Making the body
class SoftmaxBody(nn.Module):
def __init__(self, T):
super(SoftmaxBody, self).__init__()
self.T = T
def forward(self, outputs):
probs = F.softmax(outputs * self.T)
actions = probs.multinomial()
return actions
# Making the AI
class AI:
def __init__(self, brain, body):
self.brain = brain
self.body = body
def __call__(self, inputs):
input = Variable(torch.from_numpy(np.array(inputs, dtype = np.float32)))
output = self.brain(input)
actions = self.body(output)
return actions.data.numpy()
# Part 2 - Training the AI with Deep Convolutional Q-Learning
#Getting the simulink environment
# Creating Connection for sender and receiver socket
env_simu = env.environment()
env_simu.createServerSockets()
# Building an AI
qnetwork = Qnetwork(1,20)
softmax_body = SoftmaxBody(T = 1.0)
ai = AI(brain = qnetwork, body = softmax_body)
env_simu.sendAction(0)
# Setting up Experience Replay
n_steps = experience_replay_4_eligibility.NStepProgress(env = env_simu, ai = ai, n_step = 10)
memory = experience_replay_4_eligibility.ReplayMemory(n_steps = n_steps, capacity = 10000)
# Implementing Eligibility Trace
def eligibility_trace(batch):
gamma = 0.99
inputs = []
targets = []
for series in batch:
input = Variable(torch.from_numpy(np.array([series[0].state, series[-1].state], dtype = np.float32)))
print('eli', type(input),'value',input)
output = qnetwork(input)
cumul_reward = 0.0 if series[-1].done else output[1].data.max()
for step in reversed(series[:-1]):
cumul_reward = step.reward + gamma * cumul_reward
state = series[0].state
target = output[0].data
target[series[0].action] = cumul_reward
inputs.append(state)
targets.append(target)
return torch.from_numpy(np.array(inputs, dtype = np.float32)), torch.stack(targets)
# Making the moving average on 100 steps
class MA:
def __init__(self, size):
self.list_of_rewards = []
self.size = size
def add(self, rewards):
if isinstance(rewards, list):
self.list_of_rewards += rewards
else:
self.list_of_rewards.append(rewards)
while len(self.list_of_rewards) > self.size:
del self.list_of_rewards[0]
def average(self):
return np.mean(self.list_of_rewards)
ma = MA(100)
# Training the AI
loss = nn.MSELoss()
optimizer = optim.Adam(qnetwork.parameters(), lr = 0.001)
nb_epochs = 100
for epoch in range(1, nb_epochs + 1):
memory.run_steps(200)
for batch in memory.sample_batch(128):
print('hej')
inputs, targets = eligibility_trace(batch)
inputs, targets = Variable(inputs), Variable(targets)
predictions = qnetwork(inputs)
loss_error = loss(predictions, targets)
optimizer.zero_grad()
loss_error.backward()
optimizer.step()
rewards_steps = n_steps.rewards_steps()
ma.add(rewards_steps)
avg_reward = ma.average()
print("Epoch: %s, Average Reward: %s" % (str(epoch), str(avg_reward)))
if avg_reward >= 1500:
print("Congratulations, your AI wins")
break
And this is my experience replay code:
# Experience Replay
# Importing the libraries
import numpy as np
from collections import namedtuple, deque
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
# Defining one Step
Step = namedtuple('Step', ['state', 'action', 'reward'])
# Making the AI progress on several (n_step) steps
last_distance = 0
class NStepProgress:
def __init__(self, env, ai, n_step):
self.ai = ai
self.rewards = []
self.env = env
self.n_step = n_step
def __iter__(self):
global last_distance
goalT1 = 20
state = self.env.receiveState()
state = state[0]
print('staaaate',state)
state = [state]
state = torch.Tensor(state).float().unsqueeze(0)
history = deque()
reward = 0.0
print('reward',type(reward))
print('state',type(state))
while True: # HUUUUSK DEN LUDER STATE TIL TENSOR ELLER et eller ANDET
print('state', type(np.array(state)),'value',np.array(state))
action = self.ai(np.array(state))
print('action output', type(action))
action = int(action[0][0])
self.env.sendAction(action+1)
next_state = self.env.receiveState()
next_state = next_state[0]
distance = ((abs(goalT1 - next_state)))
#next_state, r, is_done, _ = self.env.step(action)
# Reward Policy
if 0 <= distance <= 0.2:
r = 1
if distance < last_distance:
r = r*0.5
elif distance < last_distance:
r = 0.1
else:
r = -0.5*(distance)
reward += r
history.append(Step(state = state, action = action, reward = r))
while len(history) > self.n_step + 1:
history.popleft()
if len(history) == self.n_step + 1:
yield tuple(history)
next_state = [next_state]
next_state = torch.Tensor(next_state).float().unsqueeze(0)
state = next_state
self.rewards.append(reward)
if r == 1:
if len(history) > self.n_step + 1:
history.popleft()
while len(history) >= 1:
yield tuple(history)
history.popleft()
self.rewards.append(reward)
reward = 0.0
state = self.env.receiveState()
state = state[0]
state = [state]
state = torch.Tensor(state).float().unsqueeze(0)
history.clear()
def rewards_steps(self):
rewards_steps = self.rewards
self.rewards = []
return rewards_steps
# Implementing Experience Replay
class ReplayMemory:
def __init__(self, n_steps, capacity = 10000):
self.capacity = capacity
self.n_steps = n_steps
self.n_steps_iter = iter(n_steps)
self.buffer = deque()
def sample_batch(self, batch_size): # creates an iterator that returns random batches
ofs = 0
vals = list(self.buffer)
np.random.shuffle(vals)
while (ofs+1)*batch_size <= len(self.buffer):
yield vals[ofs*batch_size:(ofs+1)*batch_size]
ofs += 1
def run_steps(self, samples):
while samples > 0:
entry = next(self.n_steps_iter) # 10 consecutive steps
self.buffer.append(entry) # we put 200 for the current episode
samples -= 1
while len(self.buffer) > self.capacity: # we accumulate no more than the capacity (10000)
self.buffer.popleft()
i know the problem is in the eligibility trace function when i use the batch from the memory.sample_batch(128), and then put two values into the numpy array but i do not know how this happen or how i can code around this… I hoped you could help me on this on one because i have been staring at this problem for 2 days…