Backprop point multiplication weight matrix?

peepeepoopoo · October 8, 2020, 3:13am

I’m a little confused as to how PyTorch would keep track and update the weight matrix (point multiplication to the input matrix), should the weight matrix be fed to the network itself where it will be kept track of manually by the user after each update, or its going to be updated and track automatically by the PyTorch library?

ptrblck · October 10, 2020, 10:30am

Generally you could use nn.Modules, which will keep the parameters as internal attributes, or use the functional API where you can keep track of all parameters manually.

I would recommend to take a look at some tutorials to see different work flows and use cases.

peepeepoopoo · October 14, 2020, 4:48pm

As a follow up question (a slightly silly one at that), I was wondering how one would be sure if the back propagation isn’t broken. I check the gradient of my weight matrix that I initialized in init and was able to get a value after backward(), that means that the backprop isn’t broken up until that point, would that be correct? and trying to print loss.backward() would gave me none either way?

Sorry the last one sounds really dumb, just wanna make sure I’m not doing anything incorrectly.

here is a snippet of what I was testing out, seemed to work fine (also if things are correct other people in the future can reference this):

from __future__ import print_function
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch_geometric.nn import knn
from torchviz import make_dot
from skimage import io, transform



channel_in  = 6
channel_out = 6
neighbours  = 2
err_val     = 0
CHANNEL_MLP  = channel_in




class MLP_2L(nn.Module):
	def __init__(self, channels):
		super().__init__()	
		self.channel= channels
		self.hidden = nn.Linear(int(channels) , int(channels/2))
		self.output = nn.Linear(int(channels/2) , int(channels))
		self.bn     = nn.BatchNorm1d(channels)
		
	def forward(self, input):
		output = self.output(self.hidden(input))
		output = F.relu(self.bn(output))

		
		return output
		



class testnet(nn.Module):
	
	def __init__(self, in_channel, out_channel):
		super().__init__()
		self.in_channel = in_channel
		
		### mlp
		self.mlp   = MLP_2L(in_channel)
	
		
		### initialize parameterized weight matrix
		Fk = torch.randn(3,6,requires_grad = True)
		self.Fk= torch.nn.Parameter(Fk)


	def forward(self, test_features):
		
		conv_out = self.mlp(test_features)
		print(conv_out)
		print(conv_out.requires_grad)		
		

		conv_out = torch.mul(conv_out, self.Fk)
		print(conv_out)
		print(conv_out.requires_grad)	
		
		return conv_out






### variables
raw_features = np.array([[0,0,0,0,0,0], [1,1,1,1,1,1], [2,2,2,2,2,2]])
features = torch.tensor(raw_features,dtype=torch.float,requires_grad=True)


### loss function
criterion = nn.MSELoss()

		
### initalize network		
net = testnet(channel_in, channel_out)
for param in net.parameters():
    print(type(param.data), param.size())
    
print("\n\n")
print(testnet)


### send the input through the layer to get output
output = net(features)


## get loss
loss = criterion(output, torch.randn(3,6,requires_grad = True))
print('loss:')
print(loss)


### get grad
loss.backward()


print('Fk grad:')
print(net.Fk.grad)




















#### test pytorch with pcnn strutuce propsed using mlp, with relu and batch normalization afterward

channel_in  = 6
channel_out = 6
neighbours  = 2
err_val     = 0
CHANNEL_MLP  = channel_in








#### mlp that's been fed with 3 x neighbour_size at each input (with channel_size amount of input)
## input of Nx3xneighbours {or 3xneighbours per channel) mlp, return Nx3xneighbours

class MLP_2L(nn.Module):
	def __init__(self, channels):
		super().__init__()	
		self.channel= channels
		self.hidden = nn.Linear(int(channels) , int(channels/2))
		self.output = nn.Linear(int(channels/2) , int(channels))
		self.bn     = nn.BatchNorm1d(channels)
		
	def forward(self, input):
		output = self.output(self.hidden(input))
		output = F.relu(self.bn(output))

		
		return output
		



class FUSE_SUB_DEPTHNET(nn.Module):
	
	def __init__(self, in_channel, out_channel, neighbours):
		super().__init__()
		
		### input variables
		self.in_channel = in_channel
		self.neighbours = neighbours
		
		### MLP layer
		self.mlp   = MLP_2L(in_channel)
		
		### initialize parameterized weight matrix
		Fk = torch.randn(3,self.neighbours, self.in_channel,requires_grad = True)
		self.Fk= torch.nn.Parameter(Fk)


	def forward(self, test_xyz, test_feature):

		
		### KNN and find the corresponding features and xyz
		assign_index            = knn(test_xyz, test_xyz, self.neighbours)
		#tensor_idx              = Depth_conv_otpt_idx.clone().detach()#.requires_grad_(True)
		
		knn_neighbours          = test_xyz[assign_index]
		knn_features            = test_feature[assign_index]
		
		print(knn_neighbours.size())
		print(knn_neighbours)
		print(knn_neighbours.requires_grad)
		
		conv_out = self.mlp(knn_features)
		print(conv_out)
		print(conv_out.requires_grad)		
		
		
		
		return conv_out







raw_x = np.array([[-1, -1,1], [-1, 1,4], [1, -1,6], [1, 1,9], [20,3,5,], [-4,-8,-30],[-0.5,1,-4], [0.5,-2,-4.1],[1,-4,5],[6,9,30], [-4,6,5],[31,42,1],[4,64,13],[44,59,-103],[1,55,671]])
raw_y = np.array([[-1, 0, 1], [1, 0, 1], [0,0,0]])
raw_features = np.array([[0,0,0,0,0,0], [1,1,1,1,1,1], [2,2,2,2,2,2]])


x = torch.tensor(raw_x,dtype=torch.float,requires_grad=True)
y = torch.tensor(raw_y,dtype=torch.float,requires_grad=True)
features = torch.tensor(raw_features,dtype=torch.float,requires_grad=True)


### loss function
criterion = nn.MSELoss()


		
### initalize network		
Depth_cnn = FUSE_SUB_DEPTHNET(channel_in, channel_out, neighbours)
for param in Depth_cnn.parameters():
    print(type(param.data), param.size())
print(Depth_cnn)



### send the input through the layer to get output
output = Depth_cnn(y, features)


## get loss
loss = criterion(output, torch.randn(6,6,requires_grad = True))
print('loss:')
print(loss)


### get grad
loss.backward()

print(Depth_cnn.mlp.hidden.bias.grad)

ptrblck · October 15, 2020, 2:15am

Yes, that’s correct.

Yes, tensor.backward() doesn’t return anything.