RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [64, 64, 1, 1]], which is output 0 of ReluBackward0, is at version 1; expected version 0 instead

Chao_Fan · April 7, 2023, 7:24am

hi, i am facing this problem and i don’t know how to solve it.

import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from pandas.plotting._matplotlib import hist
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import numpy as np
import pandas as pd
import sys
import json
import os
import sklearn.metrics as metrics
from watermark_regularizer import WatermarkRegularizer
from watermark_regularizer import get_wmark_regularizer
from resnet import ResNet

set device

device = torch.device(‘cuda’ if torch.cuda.is_available() else ‘cpu’)

RESULT_PATH = ‘./result’
MODEL_CHKPOINT_FNAME = os.path.join(RESULT_PATH, ‘WRN-Weights.pth’)

def update_hdf5(fname, path, data):
store = pd.HDFStore(fname)
if path in store.keys():
store.remove(path)
store.append(path, data)
store.close()

def save_wmark_signatures(prefix, model):
for layer_id, wmark_regularizer in get_wmark_regularizer(model):
fname_w = prefix + ‘_layer{}_w.npy’.format(layer_id)
fname_b = prefix + ‘_layer{}_b.npy’.format(layer_id)
np.save(fname_w, wmark_regularizer.get_matrix())
np.save(fname_b, wmark_regularizer.get_signature())

lr_schedule = [60, 120, 160]

def schedule(epoch_idx):
if(epoch_idx + 1) < lr_schedule[0]:
return 0.1
elif(epoch_idx + 1) < lr_schedule[1]:
return 0.02
elif(epoch_idx + 1) < lr_schedule[2]:
return 0.004
return 0.0008

if name == ‘main’:
if len(sys.argv) < 2:
print(“Fatal: You forgot to include the directory name on the command line.”)
print(“Usage: python %s ” % sys.argv[0])
sys.exit(1)
settings_json_fname = sys.argv[1]
train_settings = json.load(open(settings_json_fname))

if not os.path.isdir(RESULT_PATH):
    os.makedirs(RESULT_PATH)

torch.autograd.set_detect_anomaly(True)  # enable anomaly detection

# load dataset and fitting data for learning
if train_settings['dataset'] == 'cifar10':
    transform_train = transforms.Compose([transforms.RandomCrop(32, padding=4),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.ToTensor(),
                                          transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
                                          ])
    transform_test = transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
                                         ])

    train_set = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    test_set = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_test)

    train_loader = DataLoader(train_set, batch_size=train_settings['batch_size'], shuffle=True)
    test_loader = DataLoader(test_set, batch_size=train_settings['batch_size'], shuffle=False)
    nb_classes = 10
else:
    print('not supported dataset"{}"'.format(train_settings['dataset']))
    exit(1)

if 'replace_train_y' in train_settings and len(train_settings['replace_train_y']) > 0:
    print('train_loader was replaced from"{}"'.format(train_settings['replace_train_y']))
    # train_loader = np.load(train_settings['replace_train_y'])
    train_loader = torch.from_numpy(np.load(train_settings['replace_train_y']))

# read parameters
batch_size = train_settings['batch_size']
nb_epoch = train_settings['epoch']
scale = train_settings['scale']
embed_dim = train_settings['embed_dim']
N = train_settings['N']
k = train_settings['k']

target_blk_id = train_settings['target_blk_id']
base_modelw_fname = train_settings['base_modelw_fname']
wtype = train_settings['wmark_wtype']
randseed = train_settings['randseed'] if 'randseed' in train_settings else 'none'
ohist_fname = train_settings['history']
hist_hdf_path = 'WTYPE_{}/DIM{}/SCALE{}/N{}K{}B{}EPOCH{}/TBLK{}'.format(wtype, embed_dim, scale, N, k, batch_size, nb_epoch, target_blk_id)
modelname_prefix = os.path.join(RESULT_PATH, 'wrn_' + hist_hdf_path.replace('/', '_'))

# initialize process for watermark
b = np.ones((1, embed_dim))
wmark_regularizer = WatermarkRegularizer(scale, b, wtype=wtype, randseed=randseed)
init_shape = (3, 32, 32)
# model = ResNet(init_shape, nb_classes)
# model = ResNet(init_shape, image_channels=3, num_classes=10, layers=3)
model = ResNet(image_channels=3, num_classes=10, layers=[2, 3, 4])

print(model)
print('watermark matrix: \n{}'.format(wmark_regularizer.get_matrix()))

# training process
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, nesterov=True)
criterion = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: schedule(x))

if len(base_modelw_fname) > 0:
    model.load_state_dict(torch.load(base_modelw_fname))
print("finished compiling")

# train network
for epoch in range(nb_epoch):
    losses = []

    for batch_idx, (data, targets) in enumerate(train_loader):
        # get data to cuda if possible
        data = data.to(device=device)
        targets = targets.to(device=device)

        # forward
        scores = model(data)
        loss = criterion(scores, targets)

        losses.append(loss.item())

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()

    print(f'Cost at epoch {epoch} is {sum(losses) / len(losses)}')

# check accuracy
def check_accuracy(loader, model):
    if loader.dataset:
        print("checking accuracy on training data")
    else:
        print("checking accuracy on test data")

    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

    print(f'got {num_correct}/{num_samples} with accuracy {float(num_correct)/float(num_samples)*100:2f}')
    model.train()

check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

####
torch.save(model.state_dict(), modelname_prefix + '.pt')
with open(ohist_fname, 'ab') as f:
    pickle.dump(hist.history, f)

if target_blk_id > 0:
    save_wmark_signatures(model, modelname_prefix)

ptrblck · April 7, 2023, 8:11am

I don’t see any obvious issues in your code, but also don’t know how e.g. WatermarkRegularizer works and how its used.
Do you see the error in during the training of the standalone model? If so, how is ResNet defined?

KFrank · April 7, 2023, 1:32pm

Hi Chao!

Here is a post that discusses how to diagnose and fix inplace-modification
errors:

"RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [64, 1]], which is output 0 of AsStridedBackward0, is at version 3; expected version 2 instead. Hint: the backtrace further a autograd

Hi Fahmyadan and Sangyoon! Here are some suggestions about how to track down (and maybe fix) inplace-modification errors. Note that an inplace modification in the forward pass is not necessarily* an error – it depends on whether and how the tensor that was modified is used in the backward pass. Note that inplace operations can be useful for saving memory – if you replace an innocent inplace operation with an out-of-place equivalent, your training will use more memory (and, to a minor e…

Please feel free to post follow-up questions if you’re still having problems
with this issue.

Good luck!

K. Frank

Chao_Fan · April 8, 2023, 2:55am

hi , @ptrblck . thank you for responding.
this is my resnet.py

import torch
import torch.nn as nn

#set device
device = torch.device(‘cuda’ if torch.cuda.is_available() else ‘cpu’)

class block(nn.Module):
def init(self, in_channels, num_filters, stride=1):
super(block, self).init()

    self.conv1 = nn.Conv2d(in_channels, num_filters, kernel_size=1, stride=1, padding=0)
    self.bn1 = nn.BatchNorm2d(num_filters)
    self.conv2 = nn.Conv2d(num_filters, num_filters, kernel_size=3, stride=stride, padding=1)
    self.bn2 = nn.BatchNorm2d(num_filters)
    self.relu = nn.ReLU()

    # Add this 1x1 convolutional layer to match the number of channels in output tensor with input tensor
    self.conv_identity = nn.Conv2d(in_channels, num_filters, kernel_size=1, stride=stride, padding=0)

def forward(self, x):
    identity = x

    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.conv2(x)
    x = self.bn2(x)
    x = self.relu(x)

    # Pass identity tensor through the 1x1 convolutional layer to match the number of channels
    identity = self.conv_identity(identity)

    x += identity
    x = self.relu(x)
    return x

class ResNet(nn.Module):
def init(self, image_channels, num_classes, layers):
super(ResNet, self).init()
self.in_channels = 16
self.conv1 = nn.Conv2d(image_channels, 16, kernel_size=3, stride=2, padding=1)
self.bn1 = nn.BatchNorm2d(16)
self.relu = nn.ReLU()
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    # resnet layer
    self.layer1 = self._make_layer(block, 16, layers[0])
    self.layer2 = self._make_layer(block, 32, layers[1])
    self.layer3 = self._make_layer(block, 64, layers[2])
    self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
    self.fc = nn.Linear(64, num_classes)

def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.avgpool(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)
    return x

def _make_layer(self, block, num_filters, stride=1):
    strides = [stride] + [1] * (num_filters - 1)
    layers = []
    for stride in strides:
        layers.append(block(self.in_channels, num_filters, stride))
        self.in_channels = num_filters
    return nn.Sequential(*layers)

def test():
net = ResNet(image_channels=3, num_classes=100, layers=[2, 3, 4])
x = torch.randn(3, 3, 32, 32)
y = net(x).to(device)
print(y.shape)

test()

and watermark regularizer

import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as f

def random_index_generator(count):
indices = np.arrange(0, count)
np.random.shuffle(indices)

for idx in indices:
    yield idx

class WatermarkRegularizer(nn.Module):
def init(self, k, b, wtype=‘random’, randseed=‘none’):
super(WatermarkRegularizer, self).init()
self.k = torch.tensor(k, dtype=torch.float)
self.uses_learning_phase = True
self.wtype = wtype
self.w = None
self.p = None
self.b = torch.tensor(b)

    if randseed == 'time':
        import time
        np.random.seed(int(time.time()))

def set_param(self, p):
    if self.p is not None:
        raise Exception('Regularizer cannot be reused.''Instantiate one regularizer per layer.')

    self.p = p

    #make matrix
    p_shape = p.shape
    w_rows = np.prod(p_shape[0:3])
    w_cols = self.b.shape[1]

    if self.wtype == 'random':
        self.w = np.random.randn(w_rows, w_cols)
    elif self.wtype == 'direct':
        self.w = np.zeros((w_rows, w_cols), dtype=None)
        rand_idx_gen = random_index_generator(w_rows)

        for col in range(w_cols):
            self.w[next(rand_idx_gen)][col] = 1.
    elif self.wtype == 'diff':
        self.w = np.zeros((w_rows, w_cols), dtype=None)
        rand_idx_gen = random_index_generator(w_rows)

        for col in range(w_cols):
            self.w[next(rand_idx_gen)][col] = 1.
            self.w[next(rand_idx_gen)][col] = -1.
        else:
            raise Exception('wtype="{}" is not supported'.format(self.wtype))

def forward(self, loss):
    if self.p is None:
        raise Exception('Need to call `set_param` on'
                        'WeightRegularizer instance'
                        'before calling the instance.')

    regularized_loss = loss
    x = torch.mean(self.p, dim=3)
    y = torch.reshape(x, (1, torch.numel(x)))
    z = torch.tensor(self.w, dtype=torch.float)
    regularized_loss += self.k * torch.sum(f.binary_cross_entropy_with_logits(torch.matmul(y, z),
                                                                              self.b.float()))
    return regularized_loss

def set_layer(self, layer):
    print('called WatermarkRegularizer.set_layer()')
    super(WatermarkRegularizer, self).set_layer(layer)

def get_matrix(self):
    return self.w

def get_signature(self):
    return self.b

def get_encoded_code(self):
# this function will not work if set_layer was not called
    layer = self.layer
    weights = layer.get_weights()
    weight = (np.array(weights[0])).mean(axis=3)
    print(torch.sigmoid(torch.matmul(torch.tensor(weight.reshape(1, weight.size)), self.w)))
    return None

def get_wmark_regularizer(model):
ret = []

for i, layer in enumerate(model.layers):
    for regularizer in layer.regularizers:
        if str(regularizer.__class__).find('WatermarkRegularizer') >= 0:
            ret.append((i, regularizer))
return ret

def show_encoded_wmark(model):
for i, layer in enumerate(model.layers):
for regularizer in layer.regularizers:
if str(regularizer.class).find(‘WatermarkRegularizer’) >= 0:
print(‘’.format(i, layer.class))
weights = layer.get_weights()
weight = (np.array(weights[0])).mean(axis=3)
print(torch.sigmoid(torch.matmul(torch.tensor(weight.reshape(1, weight.size)),
regularizer.w)))
print(torch.sigmoid(torch.matmul(torch.tensor(weight.reshape(1, weight.size)),
regularizer.w))>0.5)