GRU error 'tuple' object has no attribute'data'

John_Prada · February 9, 2021, 1:43pm

Hi everyone, I am new user on pytorch. Currently i’m trying to implement an LSTM and GRU model. The LSTM training is doing well, but when i switch into GRU training im facing some issues.

Any advice?

AttributeError Traceback (most recent call last)
in
---- > 1 train ( modelGRU , train_list , val_list , path , seq , epoch , batch_size , criterionGRU , optimizerGRU , model_type = “GRU” )

in train (model, train_list, val_list, path, seq, epoch, batch_size, criterion, optimizer, model_type)
52 # print(x.size(),hidden[0].size() )
53 if model_type == “GRU” :
—> 54 hidden = hidden . data
55 else :
56 hidden = tuple ( [ e . data for e in hidden ] )

AttributeError :‘tuple’ object has no attribute’data’

class GRUNet(nn.Module):
    def __init__(self,in_size,h_size,n_layers,fc_out,out_size,dropout=0.5):
        super(GRUNet, self).__init__()   
        self.gru = nn.GRU(input_size=in_size,hidden_size=h_size,num_layers=n_layers,dropout=dropout,bias=False)
        self.fc = nn.Linear(in_features=h_size,out_features=fc_out,bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.out = nn.Linear(in_features=fc_out,out_features=out_size,bias=False)
        self.tanh = nn.Tanh()        
    def forward(self, x, hidden):
        out, hidden = self.gru(x, hidden)
        x = self.fc(x)
        x = self.relu(x)
        x = self.out(x)
        x = self.tanh(x)
        return x, hidden

class MyLstm(nn.Module):
    def __init__(self,in_size,h_size,n_layers,fc_out,out_size,dropout=0.5):
        super(MyLstm, self).__init__()
        self.lstm = nn.LSTM(input_size=in_size,hidden_size=h_size,num_layers=n_layers,dropout=dropout,bias=False)
        self.fc = nn.Linear(in_features=h_size,out_features=fc_out,bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.out = nn.Linear(in_features=fc_out,out_features=out_size,bias=False)
        self.tanh = nn.Tanh()
    def forward(self,x,hidden):
        x, hidden = self.lstm(x,hidden)
#         x = x[-1:]
        x = self.fc(x)
        x = self.relu(x)
        x = self.out(x)
        x = self.tanh(x)
        return x, hidden

def train(model,train_list,val_list,path,seq,epoch,batch_size,criterion,optimizer,model_type):
    for e in range(epoch):
        train_data = load_data(train_list,batch_size)
        a_loss = 0
        a_size = 0
        model.train()
        for x,y in train_data:
            x,y = x.to(device),y.to(device)
            bs = x.size()[1]
            h1 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
            h2 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
            hidden = (h1,h2)
#             hidden = (hidden[0].detach(),hidden[1].detach())
#             print(x.size(),hidden[0].size())
            if model_type == "GRU":
                hidden = hidden.data
            else:
                hidden = tuple([e.data for e in hidden])
            model.zero_grad()
            pred,hidden = model(x,hidden)
            loss = criterion(pred,y)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(),5)
            optimizer.step()
            a_loss += loss.detach()
            a_size += bs
#         print(e,a_loss/a_size*1e+6)
        model.eval()
        with torch.no_grad():
            val_data = load_data(val_list,batch_size)
            b_loss = 0
            b_size = 0
            for x,y in val_data:
                x,y = x.to(device),y.to(device)
                bs = x.size()[1]
                h1 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
                h2 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
                hidden = (h1,h2)
                pred,hidden = model(x,hidden)
                loss = criterion(pred,y)
                b_loss += loss.detach()
                b_size += bs
        print("epoch: {} - train_loss: {} - val_loss: {}".format(e+1,float(a_loss.item()/a_size*1e+6),b_loss.item()/b_size*1e+6))

train(modelGRU,train_list,val_list,path,seq,epoch,batch_size,criterionGRU,optimizerGRU,model_type="GRU")```

Thank you

Dwight_Foster · February 9, 2021, 2:16pm

It is because in this line:

            hidden = (h1,h2)

you combine the two hidden layers into a tuple. Because of this you cannot use .data. For a GRU you only need one hidden layer not two. So you can either use h1 or h2. You only need two for lstms because it also requires the cell state.

John_Prada · February 10, 2021, 2:23am

Thank you for your answer. I changed the code as you suggested but now i’ve got a new error
This is the code.
I would appreciate if you could help me one more time.
Thank you

path = "new_z_axis"
device = "cuda:0"
in_size = 3
h_size = 50
n_layers = 3
fc = 20
out = 1
batch_size = 16
seq = 100
epoch = 100
learning_rate = 1e-3
ratio = 0.8
checkpoint = os.path.join("checkpoints","model_"+path+"_"+str(in_size)+".pth")

def train(model,train_list,val_list,path,seq,epoch,batch_size,criterion,optimizer,model_type):
    for e in range(epoch):
        train_data = load_data(train_list,batch_size)
        a_loss = 0
        a_size = 0
        model.train()
        for x,y in train_data:
            x,y = x.to(device),y.to(device)
            bs = x.size()[1]
            
#             hidden = (hidden[0].detach(),hidden[1].detach())
#             print(x.size(),hidden[0].size())
            if model_type == "GRU":
                h1 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
                hidden = h1
                hidden = hidden.data
            else:
                h1 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
                h2 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
                hidden = (h1,h2)
                hidden = tuple([e.data for e in hidden])
                
            model.zero_grad()
            print (len(hidden))
            pred,hidden = model(x,hidden)
            loss = criterion(pred,y)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(),5)
            optimizer.step()
            a_loss += loss.detach()
            a_size += bs
#         print(e,a_loss/a_size*1e+6)
        model.eval()
        with torch.no_grad():
            val_data = load_data(val_list,batch_size)
            b_loss = 0
            b_size = 0
            for x,y in val_data:
                x,y = x.to(device),y.to(device)
                bs = x.size()[1]
                if model_type == "GRU":
                    h1 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
                    hidden = h1
                    hidden = hidden.data
                else:
                    h1 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
                    h2 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
                    hidden = (h1,h2)
                    hidden = tuple([e.data for e in hidden])
                pred,hidden = model(x,hidden)
                loss = criterion(pred,y)
                b_loss += loss.detach()
                b_size += bs
        print("epoch: {} - train_loss: {} - val_loss: {}".format(e+1,float(a_loss.item()/a_size*1e+6),b_loss.item()/b_size*1e+6))```

RuntimeError Traceback (most recent call last)
in
---- > 1 train ( modelGRU , train_list , val_list , path , seq , epoch , batch_size , criterionGRU , optimizerGRU , model_type = “GRU” )

in train (model, train_list, val_list, path, seq, epoch, batch_size, criterion, optimizer, model_type)
61 model . zero_grad ( )
62 print ( len ( hidden ) )
—> 63 pred , hidden = model ( x , hidden )
64 loss = criterion ( pred , y )
65 loss .backward ( )

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in call (self, *input, **kwargs)
539 result = self . _slow_forward ( * input , ** kwargs )
540 else :
→ 541 result = self . forward ( * input , ** kwargs )
542 for hook in self . _forward_hooks . values ( ) :
543 hook_result = hook ( self , input , result )

in forward (self, x, hidden)
9 def forward ( self , x , hidden ) :
10 out , hidden = self . gru ( x , hidden )
—> 11 x = self . fc ( x )
12 x = self . relu ( x )
13 x =self . out ( x )

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in call (self, *input, **kwargs)
539 result = self . _slow_forward ( * input , ** kwargs )
540 else :
→ 541 result = self . forward ( * input , ** kwargs )
542 for hook in self . _forward_hooks . values ( ) :
543 hook_result = hook ( self , input , result )

~ \ Anaconda3 \ lib \ site-packages \ torch \ nn \ modules \ linear.py in forward (self, input)
85
86 def forward ( self , input ) :
—> 87 return F . Linear ( the Input , Self . Weight , Self . negative bias ) 88 89 def extra_repr ( Self ) :

~\Anaconda3\lib\site-packages\torch\nn\functional.py in linear (input, weight, bias)
1370 ret = torch . addmm ( bias , input , weight . t ( ) )
1371 else :
→ 1372 output = input . matmul ( weight . t ( ) )
1373 if bias is not None :
1374 output += bias

RuntimeError : size mismatch, m1: [1600 x 3], m2: [50 x 20] at C:/w/1/s/tmp_conda_3.7_104508/conda/conda-bld/pytorch_1572950778684/work/aten/src\THC/ generic/THCTensorMathBlas.cu:290```

Dwight_Foster · February 10, 2021, 1:10pm

It is because in your linear function you pass in x which is the data that has the shape for an rnn not a linear layer. You need to pass in the output of you rnn not the data. You also have to change the shape of the output rnn so it is accepted by the linear layer which only takes a tensor with two shapes while the rnn has three because of the sequence length. So like this

out = out.view(out.shape[1], out.shape[0] * out.shape[2])
x = self.fc(out)

This may also produce an error depending on the size of your first linear layer. Just try it though and we can figure out that error pretty easily.

John_Prada · February 11, 2021, 1:45am

Thank you so much. It works!!

class GRUNet(nn.Module):
    def __init__(self,in_size,h_size,n_layers,fc_out,out_size,dropout=0.5):
        super(GRUNet, self).__init__()   
        self.gru = nn.GRU(input_size=in_size,hidden_size=h_size,num_layers=n_layers,dropout=dropout,bias=False)
        self.fc = nn.Linear(in_features=h_size,out_features=fc_out,bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.out = nn.Linear(in_features=fc_out,out_features=out_size,bias=False)
        self.tanh = nn.Tanh()        
    def forward(self, x, hidden):
        out, hidden = self.gru(x, hidden)
        x = self.fc(out)
        #x = self.fc(x)
        x = self.relu(x)
        x = self.out(x)
        x = self.tanh(x)
        return x, hidden