import gym
import collections#
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
#Hyperparameters
lr_mu = 0.0005
lr_q = 0.001
gamma = 0.99
buffer_limit = 50000
batch_size = 32
tau = 0.005
class ReplayBuffer():
def __init__(self):
self.buffer = collections.deque(maxlen=buffer_limit)
def put(self, data):
self.buffer.append(data)
def sample(self, n):
mini_batch = random.sample(self.buffer, n)
s_lst, a_lst, r_lst, s_prime_lst, done_mask_lst = [], [], [], [], []
for transition in mini_batch:
s, a, r, s_prime, done_mask = transition
s_lst.append(s)
a_lst.append([a])
r_lst.append([r])
s_prime_lst.append(s_prime)
done_mask_lst.append([done_mask])
return torch.tensor(s_lst, dtype=torch.float), torch.tensor(a_lst), \
torch.tensor(r_lst), torch.tensor(s_prime_lst, dtype=torch.float), \
torch.tensor(done_mask_lst)
def size(self):
return len(self.buffer)
class Qnet(nn.Module):
def __init__(self):
super(Qnet, self).__init__()
self.fc_s = nn.Linear(3, 128)
self.fc_a = nn.Linear(128, 64)
self.fc_q = nn.Linear(64,64)
self.fc_3 = nn.Linear(64, 1)
def forward(self, a):
h1 = F.relu(self.fc_s(a))
h2 = F.relu(self.fc_a(a))
cat = torch.cat([h1,h2],dim=1)
q = F.relu(self.fc_q(cat))
q = self.fc_3(q)
return q
class Munet(nn.Module):
def __init__(self):
super(Munet, self).__init__()
self.fc1 = nn.Linear(3, 128)
self.fc2 = nn.Linear(128, 64)
self.fc_mu = nn.Linear(64, 1)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
mu = torch.tanh(self.fc_mu(x))*2
return mu
def train(mu,mu_target,q,q_target,memory,q_optimizer,mu_optimizer):
s,a,r,s_prime,done_mask = memory.sample(batch_size)
td_target = r+gamma*q_target(s_prime,mu_target(s_prime))
q_loss = F.smooth_l1_loss(q(s,a), td_target.detach())
q_optimizer.zero_grad()
q_loss.backward()
q_optimizer.step()
mu_loss = -q(s,mu(s)).mean()
mu_optimizer.zero_grad()
mu_loss.backward()
mu_optimizer.step()
def main():
env = gym.make('Pendulum-v0')
memory = ReplayBuffer()
q = Qnet()
q_target = Qnet()
q_target.load_state_dict(q.state_dict())
mu =Munet()
mu_target = Munet()
mu_target.load_state_dict(mu.state_dict())
print_interval = 20
score = 0.0
min_play_reward = 0
iterations = 10000
q_optimizer = optim.Adam(q.parameters(), lr=lr_q)
mu_optimizer = optim.Adam(mu.parameters(), lr=lr_mu)
for interation in range(iterations):
s = env.reset()
for t in range(300):
a = mu(torch.from_numpy(s).float())
a = a.item()
s_prime, r, done, info = env.step([a])
memory.put((s,a,r/100.0,s_prime,done))
score +=r
s = s_prime
if memory.size()>2000:
for i in range(10):
train(mu, mu_target, q, q_target, memory, q_optimizer,mu_optimizer)
soft_update(mu,mu_target)
soft_update(q,q_target)
def soft_update(net,net_target):
for param_target, param in zim(net_target.parametes(),\
net.parameters()):
param_target.data.copy_(param_target.data*(1.0-tau)+\
param.data(tau))
env.close()
if name == āmainā:
main()
It is my code of Pendulum project. it has some error
"forward() takes 2 positional arguments but 3 were given "
As far as iām concerned, i think that cause of its is "class Qnet(nn.Module): ,class Munet(nn.Module): "
How can i fix the code?
Thank you