Here’s my code -
# Here we import all libraries
import numpy as np
import gym
import matplotlib.pyplot as plt
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from collections import deque
env = gym.make("CliffWalking-v0")
#Hyperparameters
episodes = 5000
eps = 1.0
learning_rate = 0.1
discount_factor = 0.99
tot_rewards = []
decay_val = 0.001
mem_size = 50000
class NeuralNetwork(nn.Module):
def __init__(self, state_size, action_size):
super(NeuralNetwork, self).__init__()
self.state_size = state_size
self.action_size = action_size
# self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(state_size, 30),
nn.ReLU(),
nn.Linear(30, 30),
nn.ReLU(),
nn.Linear(30, action_size)
)
def forward(self, x):
x = self.linear_relu_stack(x)
return x
model = NeuralNetwork(env.observation_space.n, env.action_space.n)
opt = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
loss = nn.MSELoss()
replay_buffer = deque(maxlen=mem_size)
state = env.reset()
action = model(state)
While there are many SO posts on this topic, none of them solve the issue.