I’m a little confused as to how PyTorch would keep track and update the weight matrix (point multiplication to the input matrix), should the weight matrix be fed to the network itself where it will be kept track of manually by the user after each update, or its going to be updated and track automatically by the PyTorch library?
Generally you could use nn.Module
s, which will keep the parameters as internal attributes, or use the functional API where you can keep track of all parameters manually.
I would recommend to take a look at some tutorials to see different work flows and use cases.
As a follow up question (a slightly silly one at that), I was wondering how one would be sure if the back propagation isn’t broken. I check the gradient of my weight matrix that I initialized in init and was able to get a value after backward(), that means that the backprop isn’t broken up until that point, would that be correct? and trying to print loss.backward() would gave me none either way?
Sorry the last one sounds really dumb, just wanna make sure I’m not doing anything incorrectly.
here is a snippet of what I was testing out, seemed to work fine (also if things are correct other people in the future can reference this):
from __future__ import print_function
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch_geometric.nn import knn
from torchviz import make_dot
from skimage import io, transform
channel_in = 6
channel_out = 6
neighbours = 2
err_val = 0
CHANNEL_MLP = channel_in
class MLP_2L(nn.Module):
def __init__(self, channels):
super().__init__()
self.channel= channels
self.hidden = nn.Linear(int(channels) , int(channels/2))
self.output = nn.Linear(int(channels/2) , int(channels))
self.bn = nn.BatchNorm1d(channels)
def forward(self, input):
output = self.output(self.hidden(input))
output = F.relu(self.bn(output))
return output
class testnet(nn.Module):
def __init__(self, in_channel, out_channel):
super().__init__()
self.in_channel = in_channel
### mlp
self.mlp = MLP_2L(in_channel)
### initialize parameterized weight matrix
Fk = torch.randn(3,6,requires_grad = True)
self.Fk= torch.nn.Parameter(Fk)
def forward(self, test_features):
conv_out = self.mlp(test_features)
print(conv_out)
print(conv_out.requires_grad)
conv_out = torch.mul(conv_out, self.Fk)
print(conv_out)
print(conv_out.requires_grad)
return conv_out
### variables
raw_features = np.array([[0,0,0,0,0,0], [1,1,1,1,1,1], [2,2,2,2,2,2]])
features = torch.tensor(raw_features,dtype=torch.float,requires_grad=True)
### loss function
criterion = nn.MSELoss()
### initalize network
net = testnet(channel_in, channel_out)
for param in net.parameters():
print(type(param.data), param.size())
print("\n\n")
print(testnet)
### send the input through the layer to get output
output = net(features)
## get loss
loss = criterion(output, torch.randn(3,6,requires_grad = True))
print('loss:')
print(loss)
### get grad
loss.backward()
print('Fk grad:')
print(net.Fk.grad)
#### test pytorch with pcnn strutuce propsed using mlp, with relu and batch normalization afterward
channel_in = 6
channel_out = 6
neighbours = 2
err_val = 0
CHANNEL_MLP = channel_in
#### mlp that's been fed with 3 x neighbour_size at each input (with channel_size amount of input)
## input of Nx3xneighbours {or 3xneighbours per channel) mlp, return Nx3xneighbours
class MLP_2L(nn.Module):
def __init__(self, channels):
super().__init__()
self.channel= channels
self.hidden = nn.Linear(int(channels) , int(channels/2))
self.output = nn.Linear(int(channels/2) , int(channels))
self.bn = nn.BatchNorm1d(channels)
def forward(self, input):
output = self.output(self.hidden(input))
output = F.relu(self.bn(output))
return output
class FUSE_SUB_DEPTHNET(nn.Module):
def __init__(self, in_channel, out_channel, neighbours):
super().__init__()
### input variables
self.in_channel = in_channel
self.neighbours = neighbours
### MLP layer
self.mlp = MLP_2L(in_channel)
### initialize parameterized weight matrix
Fk = torch.randn(3,self.neighbours, self.in_channel,requires_grad = True)
self.Fk= torch.nn.Parameter(Fk)
def forward(self, test_xyz, test_feature):
### KNN and find the corresponding features and xyz
assign_index = knn(test_xyz, test_xyz, self.neighbours)
#tensor_idx = Depth_conv_otpt_idx.clone().detach()#.requires_grad_(True)
knn_neighbours = test_xyz[assign_index]
knn_features = test_feature[assign_index]
print(knn_neighbours.size())
print(knn_neighbours)
print(knn_neighbours.requires_grad)
conv_out = self.mlp(knn_features)
print(conv_out)
print(conv_out.requires_grad)
return conv_out
raw_x = np.array([[-1, -1,1], [-1, 1,4], [1, -1,6], [1, 1,9], [20,3,5,], [-4,-8,-30],[-0.5,1,-4], [0.5,-2,-4.1],[1,-4,5],[6,9,30], [-4,6,5],[31,42,1],[4,64,13],[44,59,-103],[1,55,671]])
raw_y = np.array([[-1, 0, 1], [1, 0, 1], [0,0,0]])
raw_features = np.array([[0,0,0,0,0,0], [1,1,1,1,1,1], [2,2,2,2,2,2]])
x = torch.tensor(raw_x,dtype=torch.float,requires_grad=True)
y = torch.tensor(raw_y,dtype=torch.float,requires_grad=True)
features = torch.tensor(raw_features,dtype=torch.float,requires_grad=True)
### loss function
criterion = nn.MSELoss()
### initalize network
Depth_cnn = FUSE_SUB_DEPTHNET(channel_in, channel_out, neighbours)
for param in Depth_cnn.parameters():
print(type(param.data), param.size())
print(Depth_cnn)
### send the input through the layer to get output
output = Depth_cnn(y, features)
## get loss
loss = criterion(output, torch.randn(6,6,requires_grad = True))
print('loss:')
print(loss)
### get grad
loss.backward()
print(Depth_cnn.mlp.hidden.bias.grad)
Yes, that’s correct.
Yes, tensor.backward()
doesn’t return anything.