I am facing the same issue and has beem stuck for 1 day
here my code
class RNN(nn.Module):
def __init__(self,input_size, output_size, hidden_size=64):
super().__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.xh = nn.Linear(self.input_size, self.hidden_size, bias=False)
self.hh = nn.Linear(self.hidden_size, self.hidden_size)
self.hy = nn.Linear(self.hidden_size, self.output_size)
self.h = torch.zeros(self.hidden_size, requires_grad=True)
self.tanh = nn.Tanh()
self.softmax = nn.Softmax(dim=1)
self.sigmoid = nn.Sigmoid()
def rnn_cell(self, x_t):
first_h = self.hh(self.h)
second_x = self.xh(x_t)
act = second_x + first_h
self.h = self.tanh(act)
updated_c = self.sigmoid(self.hy(self.h))
return updated_c
def forward(self, inp):
return self.rnn_cell(inp)
here is training code
def train(train_x, valid_x, lr, epochs, hidden_units, net='RNN'):
for step, (data, label) in enumerate(train_x):
inputs = np.array(data)
break
if net=='RNN':
model = RNN(inputs.shape[1], 1, hidden_units)
elif net == 'LSTM':
h = torch.zeros(hidden_units).requires_grad_()
c = torch.zeros(hidden_units).requires_grad_()
model = LSTM(inputs.shape[1], 1, hidden_units)
elif net == 'GRU':
St_1 = torch.zeros(hidden_units).requires_grad_()
model = GRUModel(inputs.shape[1], 1, hidden_units)
model.to(device)
train_loss, val_loss = [],[]
train_accuracy, val_accuracy = [], []
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
criterion = nn.BCELoss()
for ep in range(epochs):
running_loss, correct = 0, 0
for i, (data, label) in enumerate(train_x):
data, label = Variable(data), Variable(label)
data, label = data.to(device), label.to(device)
optimizer.zero_grad()
if net == 'RNN':
net_out= model(data)
elif net == 'LSTM':
net_out, h, c = model(data, h, c)
elif net == 'GRU':
net_out, St_1 = model(data, St_1)
label = torch.reshape(label, (label.shape[0], 1))
net_out = torch.reshape(net_out, (label.shape[0], 1))
label = label.float()
loss = criterion(net_out, label)
loss.backward(retain_graph=True, inputs=list(model.parameters()))
optimizer.step()
running_loss += loss.item()
# pred = torch.argmax(net_out, axis=1) # get the index of the max log-probability
# actual = torch.argmax(label, axis=1)
out = (net_out>0.5).float()
correct += out.eq(label).sum()
print(running_loss)
print("Epoch:", ep)
print(correct.item())
print("Training Accuracy:", 100. * correct.item() / len(train_x.dataset))
print("Train Loss:", running_loss / len(train_x.dataset))
train_loss.append(running_loss / len(train_x.dataset))
train_accuracy.append(correct / len(train_x.dataset))
# test_loss = 0
# correct = 0
# with torch.no_grad():
# for batch_idx, (data, target) in enumerate(valid_x):
# data, target = Variable(data), Variable(target)
# data, target = data.to(device), target.to(device)
# # data = data.view(-1, 784)
# if net == 'RNN':
# net_out, _ = model(data, h)
# elif net == 'LSTM':
# net_out, _, _ = model(data, h, c)
# elif net == 'GRU':
# net_out, _ = model(data, St_1)
# net_out = torch.reshape(net_out, (net_out.shape[0],))
# # sum up batch loss
# target = target.float()
# test_loss += criterion(net_out, target).item()
# # pred = torch.argmax(net_out, axis=1) # get the index of the max log-probability
# # actual = torch.argmax(label, axis=1)
# out = (net_out>0.5).float()
# correct += out.eq(target).sum()
# val_loss.append(test_loss / len(valid_x.dataset))
# val_accuracy.append(correct / len(valid_x.dataset))
# print("Validation Accuracy:" , 100. * correct.item() / len(valid_x.dataset))
# print("Validation Loss:", test_loss / len(valid_x.dataset))
# print("----------------------------------------------------------")
return model, train_loss, train_accuracy, val_loss, val_accuracy
Here is the error list:
/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/torch/autograd/__init__.py:147: UserWarning: Error detected in MmBackward. Traceback of forward call that caused the error:
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/traitlets/config/application.py", line 845, in launch_instance
app.start()
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 619, in start
self.io_loop.start()
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
self.asyncio_loop.run_forever()
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
self._run_once()
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
handle._run()
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/asyncio/events.py", line 81, in _run
self._context.run(self._callback, *self._args)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/tornado/ioloop.py", line 688, in <lambda>
lambda f: self._run_callback(functools.partial(callback, future))
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/tornado/ioloop.py", line 741, in _run_callback
ret = callback()
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/tornado/gen.py", line 814, in inner
self.ctx_run(self.run)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/tornado/gen.py", line 775, in run
yielded = self.gen.send(value)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 358, in process_one
yield gen.maybe_future(dispatch(*args))
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/tornado/gen.py", line 234, in wrapper
yielded = ctx_run(next, result)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell
yield gen.maybe_future(handler(stream, idents, msg))
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/tornado/gen.py", line 234, in wrapper
yielded = ctx_run(next, result)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 536, in execute_request
self.do_execute(
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/tornado/gen.py", line 234, in wrapper
yielded = ctx_run(next, result)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 302, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 539, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2898, in run_cell
result = self._run_cell(
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2944, in _run_cell
return runner(coro)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
coro.send(None)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3169, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3361, in run_ast_nodes
if (await self.run_code(code, result, async_=asy)):
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3441, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-61-f6fbdf7371e3>", line 2, in <module>
net, train_loss, train_accuracy, val_loss, val_accuracy = train(train_loader, valid_loader, lr=0.0001, epochs=10, hidden_units=64, net='RNN')
File "<ipython-input-60-3bb8b3924f63>", line 35, in train
net_out= model(data)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "<ipython-input-59-cdbb099f3af3>", line 39, in forward
return self.rnn_cell(inp)
File "<ipython-input-59-cdbb099f3af3>", line 25, in rnn_cell
first_h = self.hh(self.h)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 96, in forward
return F.linear(input, self.weight, self.bias)
File "/Users/arslan/anaconda3/envs/torch_env/lib/python3.8/site-packages/torch/nn/functional.py", line 1847, in linear
return torch._C._nn.linear(input, weight, bias)
(Triggered internally at /Users/distiller/project/conda/conda-bld/pytorch_1623459044803/work/torch/csrc/autograd/python_anomaly_mode.cpp:104.)
Variable._execution_engine.run_backward(
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-61-f6fbdf7371e3> in <module>
1 torch.autograd.set_detect_anomaly(True)
----> 2 net, train_loss, train_accuracy, val_loss, val_accuracy = train(train_loader, valid_loader, lr=0.0001, epochs=10, hidden_units=64, net='RNN')
<ipython-input-60-3bb8b3924f63> in train(train_x, valid_x, lr, epochs, hidden_units, net)
44 label = label.float()
45 loss = criterion(net_out, label)
---> 46 loss.backward(retain_graph=True, inputs=list(model.parameters()))
47 optimizer.step()
48
~/anaconda3/envs/torch_env/lib/python3.8/site-packages/torch/_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
253 create_graph=create_graph,
254 inputs=inputs)
--> 255 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
256
257 def register_hook(self, hook):
~/anaconda3/envs/torch_env/lib/python3.8/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
145 retain_graph = create_graph
146
--> 147 Variable._execution_engine.run_backward(
148 tensors, grad_tensors_, retain_graph, create_graph, inputs,
149 allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [64, 64]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
can you @ptrblck @albanD please help me I’ll be very thankful to all of you