Thanks for the model definition. Your code works as expected and returns the same random values after seeding the code:
python tmp.py
OrderedDict([('layer1.weight', tensor([[0.7645]], device='cuda:0')), ('layer1.bias', tensor([0.8300], device='cuda:0')), ('layer2.weight', tensor([[-0.2343]], device='cuda:0')), ('layer2.bias', tensor([0.9186], device='cuda:0'))])
python tmp.py
OrderedDict([('layer1.weight', tensor([[0.7645]], device='cuda:0')), ('layer1.bias', tensor([0.8300], device='cuda:0')), ('layer2.weight', tensor([[-0.2343]], device='cuda:0')), ('layer2.bias', tensor([0.9186], device='cuda:0'))])
python tmp.py
OrderedDict([('layer1.weight', tensor([[0.7645]], device='cuda:0')), ('layer1.bias', tensor([0.8300], device='cuda:0')), ('layer2.weight', tensor([[-0.2343]], device='cuda:0')), ('layer2.bias', tensor([0.9186], device='cuda:0'))])
cat tmp.py
import torch
import torch.nn as nn
class LinearRegression(nn.Module):
'''
Class to define the neural network using Linear layers. Importing nn.Module is necessary whenever building any NN
'''
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.layer1 = nn.Linear(in_features=1, out_features=1, bias=True, dtype=torch.float32)
self.layer2 = nn.Linear(in_features=1, out_features=1, bias=True, dtype=torch.float32)
def forward(self, x: torch.Tensor) -> torch.Tensor:
self.forward1 = self.layer1(x)
return self.layer2(self.forward1)
torch.manual_seed(42)
device = "cuda" if torch.cuda.is_available() else "cpu"
model_linear = LinearRegression()
model_linear.to(device=device)
print(model_linear.state_dict())
As previously explained:
The initial random values are created on the CPU before you are moving the model to the GPU, so you need to seed the host, too.