Hi,

After initializing a model and sending it to cuda device I see random changes of the weights of a simple model. It doesn’t happen when using CPU device.

Running on ubuntu 2020 LTS and 2022 LTS 64bit.

NVIDIA GeForce RTX 2070

Python 3.8 & 3.10.6

torch version ‘2.0.0+cu117’ (from pip)

Any ideas how to solve this issue?

Code to reproduce:

```
from typing import Tuple
import torch
from torch import nn
class UNetTiny(nn.Module):
def __init__(self, in_channels: int, out_channels: int, num_block_filters: Tuple[int]):
super().__init__()
padding_mode = 'replicate'
# Blocks of down portion
down_modules = [
nn.Conv2d(in_channels=in_channels, out_channels=num_block_filters[0], kernel_size=3, stride=2, padding=1, padding_mode=padding_mode)
]
for i in range(1, len(num_block_filters)):
down_modules.append(
nn.Conv2d(in_channels=num_block_filters[i-1], out_channels=num_block_filters[i], kernel_size=3, stride=2, padding=1, padding_mode=padding_mode)
)
self._down_modules = nn.ModuleList(down_modules)
up_deconv_modules = list()
for i in range(len(num_block_filters) - 1, 0, -1):
up_deconv_modules.append(
torch.nn.ConvTranspose2d(in_channels=num_block_filters[i], out_channels=num_block_filters[i - 1], output_padding=1, padding=1, kernel_size=3, stride=2),
)
self._up_deconv_modules = nn.ModuleList(up_deconv_modules)
self._out_deconv = torch.nn.ConvTranspose2d(in_channels=num_block_filters[0], out_channels=out_channels, output_padding=1, padding=1, kernel_size=3, stride=2)
def forward(self, x) -> torch.Tensor:
for l in self._down_modules:
x = l(x)
x = torch.nn.functional.relu(x)
for l_deconv in self._up_deconv_modules:
x = l_deconv(x)
x = torch.nn.functional.relu(x)
res = self._out_deconv(x)
return res
def _analyze_weights(unet_model):
for name, tmp_module in unet_model.named_modules():
if type(tmp_module) not in (torch.nn.modules.conv.Conv2d, torch.nn.modules.conv.ConvTranspose2d):
continue
if tmp_module.weight.detach().abs().max().item() > 1.0:
print(f'{name} weight::\t{tmp_module.weight.detach().abs().max().item()}')
if tmp_module.bias.detach().abs().max().item() > 1.0:
print(f'{name} bias::\t{tmp_module.bias.detach().abs().max().item()}')
def _check_memory_corruption():
dev = "cuda:0" if torch.cuda.is_available() else "cpu"
# dev = 'cpu'
device = torch.device(dev)
print('Using device: ', device)
N = 100
num_block_filters = (64, 128, 256)
unet = UNetTiny(
in_channels=1,
out_channels=3,
num_block_filters=num_block_filters
)
unet.to(device=device)
unet.train()
for i in range(N):
print(f'weights on iteration {i}')
_analyze_weights(unet)
if __name__ == '__main__':
_check_memory_corruption()
```

The output I’m getting:

Using device: cuda:0

weights on iteration 0

weights on iteration 1

weights on iteration 2

weights on iteration 3

weights on iteration 4

weights on iteration 5

weights on iteration 6

weights on iteration 7

weights on iteration 8

weights on iteration 9

weights on iteration 10

weights on iteration 11

weights on iteration 12

weights on iteration 13

weights on iteration 14

weights on iteration 15

weights on iteration 16

weights on iteration 17

_up_deconv_modules.0 weight:: 254.71319580078125

weights on iteration 18

weights on iteration 19

weights on iteration 20

weights on iteration 21

weights on iteration 22

weights on iteration 23

_down_modules.2 weight:: 1815.224853515625

weights on iteration 24

weights on iteration 25

weights on iteration 26

weights on iteration 27

_down_modules.2 weight:: 1815.224853515625

weights on iteration 28

_down_modules.2 weight:: 1166.347900390625

weights on iteration 29

weights on iteration 30

weights on iteration 31

weights on iteration 32

weights on iteration 33

weights on iteration 34

weights on iteration 35

weights on iteration 36

weights on iteration 37

weights on iteration 38

weights on iteration 39

weights on iteration 40

weights on iteration 41

weights on iteration 42

weights on iteration 43

_up_deconv_modules.0 weight:: 249.4268798828125

weights on iteration 44

weights on iteration 45

weights on iteration 46

weights on iteration 47

weights on iteration 48

weights on iteration 49

weights on iteration 50

weights on iteration 51

_up_deconv_modules.0 weight:: 457.2268981933594

weights on iteration 52

weights on iteration 53

weights on iteration 54

weights on iteration 55

_down_modules.2 weight:: 427.8851318359375

weights on iteration 56

weights on iteration 57

weights on iteration 58

weights on iteration 59

weights on iteration 60

weights on iteration 61

weights on iteration 62

_down_modules.2 weight:: 1423.19970703125

weights on iteration 63

weights on iteration 64

weights on iteration 65

weights on iteration 66

weights on iteration 67

weights on iteration 68

weights on iteration 69

_up_deconv_modules.0 weight:: 254.71319580078125

weights on iteration 70

weights on iteration 71

weights on iteration 72

weights on iteration 73

weights on iteration 74

weights on iteration 75

weights on iteration 76

weights on iteration 77

weights on iteration 78

weights on iteration 79

weights on iteration 80

weights on iteration 81

weights on iteration 82

weights on iteration 83

weights on iteration 84

weights on iteration 85

weights on iteration 86

weights on iteration 87

weights on iteration 88

weights on iteration 89

weights on iteration 90

weights on iteration 91

_down_modules.2 weight:: 1815.224853515625

weights on iteration 92

weights on iteration 93

weights on iteration 94

weights on iteration 95

weights on iteration 96

weights on iteration 97

weights on iteration 98

weights on iteration 99

Process finished with exit code 0