Hi,
I am trying to create a Reinforcement Learning algorithm for a Kaggle competition called ConnectX. My purpose is to have a model that can play on any grid size. I am new to pytorch and not sure if everything I am doing is right. You will find bellow the model, then the updating procedure followed by what I already checked.
Model :
class DeepModel(nn.Module):
def __init__(self, n_rows, n_cols, n_heads, d_model, b=5, dropout=0.1, device=None):
super(DeepModel, self).__init__()
self.n_rows = n_rows
self.n_cols = n_cols
self.dropout = nn.Dropout(dropout)
self.conv0 = Conv2d(1,4,3,padding=1,dilation=1)
self.conv3 = Conv2d(1,4,3,padding=2,dilation=2)#Conv3d(1,2,3, padding=(1,2,2), dilation=(1,2,2), bias=False)
self.linear0 = Linear(8,4)
self.linear1 = Linear(4,1)
def forward(self, x):
# Permute axes to match conv expectations
x = x.permute(2,0,1).contiguous()
# Collect useful shapes
batch_size = x.size(0)
n_rows = x.size(1)
n_cols = x.size(2)
# Create filter
a = (x>0).type(torch.FloatTensor).to(x.device)
extra = torch.ones((batch_size, 1, n_cols)).to(x.device)
down = torch.cat([a, extra], dim=1)
up = torch.cat([extra, a], dim=1)
mask = (down-up).type(torch.FloatTensor)[:,1:,:].to(x.device)
# Add the feature dimension
x = x.unsqueeze(1)
# Set the upper and lower bounds a bit beyond possible produced values
vmax = 21
vmin = -21
# Perform convolution without dilation
x_0 = self.dropout(F.leaky_relu(self.conv0(x)))
# Perform convolution with dilation
x_1 = self.dropout(F.leaky_relu(self.conv3(x)))
# Perform linear layers
x = torch.cat([x_0, x_1], dim=1)
x = x.permute(0,2,3,1)#.contiguous()
x = self.dropout(F.leaky_relu(self.linear0(x)))
x = self.dropout(F.leaky_relu(self.linear1(x))).squeeze(3)
#x = self.dropout(F.relu(self.linear(x))).squeeze(3)
x = (x*mask).sum(dim=1)
# Rescale the output in a range slightly beyond the actual range
# of possible values to help the model converge
x = torch.sigmoid(x) * (vmax-vmin) + vmin
return x.T
Update procedure :
optimizer.zero_grad()
loss = criterion(selected_action_values, actual_values)
loss.backward()
optimizer.step()
Things I checked:
list(TrainNet.model.parameters())[0].grad is not None
> True
summary(TrainNet.model, (5,5))
>
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 4, 2, 5] 40
Dropout-2 [-1, 4, 2, 5] 0
Conv2d-3 [-1, 4, 2, 5] 40
Dropout-4 [-1, 4, 2, 5] 0
Linear-5 [-1, 2, 5, 4] 36
Dropout-6 [-1, 2, 5, 4] 0
Linear-7 [-1, 2, 5, 1] 5
Dropout-8 [-1, 2, 5, 1] 0
================================================================
Total params: 121
Trainable params: 121
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------
Here the parameters remain unchanged whatever I do:
for name, param in TrainNet.model.named_parameters():
if param.requires_grad:
print(name, param.data)
>
conv0.weight tensor([[[[-0.3143, 0.1387, 0.1515],
[ 0.0487, -0.1371, -0.0142],
[ 0.1233, -0.1197, -0.2649]]],
[[[ 0.2651, -0.2724, -0.3084],
[-0.1246, 0.2586, -0.1759],
[ 0.0032, -0.0672, 0.2653]]],
[[[-0.1949, 0.2969, 0.0717],
[ 0.2794, 0.1633, 0.0615],
[-0.1397, -0.1555, -0.2735]]],
[[[-0.2022, 0.3160, 0.2486],
[-0.2864, 0.0279, 0.3323],
[ 0.0386, -0.2256, 0.1192]]]])
conv0.bias tensor([-0.2692, 0.2202, 0.3285, 0.1761])
conv3.weight tensor([[[[ 0.0099, -0.0905, 0.0814],
[-0.2367, -0.2163, 0.0789],
[ 0.1303, -0.2962, 0.1005]]],
[[[-0.0794, -0.1931, -0.1032],
[ 0.0772, 0.3046, -0.2039],
[ 0.0582, -0.2211, 0.1374]]],
[[[ 0.0463, -0.2670, 0.1044],
[-0.0872, 0.3231, -0.3184],
[-0.2429, 0.0073, 0.1560]]],
[[[-0.2607, 0.1298, 0.2120],
[-0.0643, 0.0509, -0.0249],
[ 0.3145, -0.3326, -0.3263]]]])
conv3.bias tensor([-0.1426, 0.2362, -0.0571, -0.1626])
linear0.weight tensor([[-0.2452, -0.2260, -0.1418, -0.2996, -0.3340, 0.0125, 0.1750, -0.1357],
[ 0.1063, -0.3165, -0.1286, 0.3160, 0.2921, -0.2550, -0.0953, -0.2631],
[ 0.0574, -0.3489, 0.0014, -0.1107, -0.0318, -0.3257, -0.2284, -0.0762],
[-0.1969, -0.2740, -0.2867, -0.2230, -0.3325, -0.3131, -0.0557, 0.2342]])
linear0.bias tensor([ 0.1925, -0.1622, -0.0316, -0.1191])
linear1.weight tensor([[ 0.1024, 0.3509, -0.1243, 0.4008]])
linear1.bias tensor([-0.3065])
I am most likely doing something silly that I can not see and would appreciate some help