Hi, Im a begineer in pytorch. As we all know, we should not use view() or reshape() to swap dimensions of tensors
Do not use view() or reshape() to swap dimensions of tensors
because it can make wrong in computing gradient.but how can I flatten a matrix to a vector. I have already tried torch.flatten
but it still said it’s in-place operation.So how can I solve this problem or something else wrong?
here is my code:
this is the cnn articture:
class ResNet18(nn.Module):
def __init__(self,in_chanel):
super().__init__()
#layer0
self.conv=nn.Conv2d(in_chanel,64,7,stride=2,padding=3)
self.pool=nn.MaxPool2d(3,stride=2,padding=1)
self.conv1_1=Block(64,64,1)
self.conv1_2=Block(64,64,1)
self.conv2_1=Block(64,128,2,downsample=True)
self.conv2_2=Block(128,128,1)
self.conv3_1=Block(128,256,2,downsample=True)
self.conv3_2=Block(256,256,1)
self.conv4_1=Block(256,512,2,downsample=True)
self.conv4_2=Block(512,512,1)
self.relu=nn.ReLU()
self.AvgPool=nn.AvgPool2d(7)
self.BatchNorm=nn.BatchNorm2d(64,eps=1e-05,momentum=0.1)
self.Linear=nn.Linear(512,1000)
#self.flatten=nn.Flatten()
def forward(self,x):
x=self.conv(x)
x=self.relu(x)
x=self.BatchNorm(x)
x=self.pool(x)
x=self.conv1_1(x)#1,64,25,25
x=self.conv1_2(x)#1,64,25,25
x=self.conv2_1(x)#1,128,13,13
x=self.conv2_2(x)#1,128,13,13
x=self.conv3_1(x)
x=self.conv3_2(x)#1,256,7,7
x=self.conv4_1(x)
x=self.conv4_2(x)
x=self.AvgPool(x)
x=torch.flatten(x,1)
x=self.Linear(x)
return x
import torch.optim as optim
res18_query=ResNet18(1)
res18_key=ResNet18(1)
img_tensor=torch.randn(10,3,512,512)
queue=torch.randn(10,1000)
x_query=random_transform(img_tensor)#1, 1, 224, 224
x_key=random_transform(img_tensor)
q=res18_query(x_query)#1,512
k=res18_key(x_key)
k=k.detach()
#Returns a new Tensor, detached from the current graph.
#The result will never require gradient.
M,C=q.shape
K,_=queue.shape
l_pos=torch.bmm(q.view(M,1,C),k.view(M,C,1))
l_neg=torch.mm(q.view(M,C),queue.view(C,K))
l_pos=l_pos.view(M,1)
logits=torch.cat([l_pos,l_neg],dim=1) #m,k+1
and the flowing is optimizer :
optimizer=optim.SGD(res18_query.parameters(),lr=0.0001,momentum=0.9)
criterion=nn.CrossEntropyLoss()
for i in range(10):
t=0.07
labels=torch.zeros(10)
train=(logits/t).type(torch.LongTensor)
labels=labels.type(torch.LongTensor)
loss=criterion(logits/t,labels)
loss.backward(retain_graph=True)
optimizer.step()
print(loss)
tensor(60.1358, grad_fn=<NllLossBackward0>)
------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-39-ababa8ceb59d> in <module>
7 labels=labels.type(torch.LongTensor)
8 loss=criterion(logits/t,labels)
----> 9 loss.backward(retain_graph=True)
10 optimizer.step()
11 print(loss)
c:\users\lenovo\appdata\local\programs\python\python37\lib\site-packages\torch\_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
394 create_graph=create_graph,
395 inputs=inputs)
--> 396 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
397
398 def register_hook(self, hook):
c:\users\lenovo\appdata\local\programs\python\python37\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
173 Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
174 tensors, grad_tensors_, retain_graph, create_graph, inputs,
--> 175 allow_unreachable=True, accumulate_grad=True) # Calls into the C++ engine to run the backward pass
176
177 def grad(
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [512, 1000]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
I am very grateful and thankful if you can give me a advise