Regarding the size of dataloader and batch size during training

Huaiyang_Gongzi · June 9, 2021, 3:47am

I have a network designed as follows

class CNN_ForecastNet(nn.Module):
    def __init__(self):
        super(CNN_ForecastNet,self).__init__()
        self.conv1d = nn.Conv1d(3,64,kernel_size=1)
        self.relu = nn.ReLU(inplace=True)
        self.fc1 = nn.Linear(64*2,50)
        self.fc2 = nn.Linear(50,1)
        
    def forward(self,x):
        x = self.conv1d(x)
        x = self.relu(x)
        x = x.view(-1)
        print('x size',x.size())
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        
        return x

def Train():
    
    running_loss = .0
    
    model.train()
    
    for idx, (inputs,labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        print('--- idx ---',idx)
        print('inputs ',inputs)
        preds = model(inputs.float())
        loss = criterion(preds,labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss
        
    train_loss = running_loss/len(train_loader)
    train_losses.append(train_loss.detach().numpy())
    
    print(f'train_loss {train_loss}')

However, for a given training data set, the size of dataloader is not the integer multiple of batch size. The printted output, (for the last two iterations) are given as follows

--- idx --- 902
inputs  tensor([[[9.],
         [9.],
         [9.]],

        [[9.],
         [9.],
         [0.]]], device='cuda:0', dtype=torch.float64)
x size torch.Size([128])
--- idx --- 903
inputs  tensor([[[9.],
         [0.],
         [6.]]], device='cuda:0', dtype=torch.float64)
x size torch.Size([64])

This will cause the error at layer of fc1, i.e., x=self.fc1(x) in the network design,. the error message is as follows, I can see this error, but I am not clear how to correct it Thanks. The full code is attached at the end.

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-1-4d8996b2f4ab> in <module>
    100 for epoch in range(epochs):
    101     print('epochs {}/{}'.format(epoch+1,epochs))
--> 102     Train()
    103     gc.collect()

<ipython-input-1-4d8996b2f4ab> in Train()
     82         print('--- idx ---',idx)
     83         print('inputs ',inputs)
---> 84         preds = model(inputs.float())
     85         loss = criterion(preds,labels.float())
     86         loss.backward()

~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

<ipython-input-1-4d8996b2f4ab> in forward(self, x)
     57         x = x.view(-1)
     58         print('x size',x.size())
---> 59         x = self.fc1(x)
     60         x = self.relu(x)
     61         x = self.fc2(x)

~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
     91 
     92     def forward(self, input: Tensor) -> Tensor:
---> 93         return F.linear(input, self.weight, self.bias)
     94 
     95     def extra_repr(self) -> str:

~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\functional.py in linear(input, weight, bias)
   1690         ret = torch.addmm(bias, input, weight.t())
   1691     else:
-> 1692         output = input.matmul(weight.t())
   1693         if bias is not None:
   1694             output += bias

RuntimeError: mat1 dim 1 must match mat2 dim 0

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from numpy import array
import torch
import gc
import torch.nn as nn
from tqdm import tqdm_notebook as tqdm
from torch.utils.data import Dataset,DataLoader

solar_power = pd.read_csv('PV_Elec_Gas3.csv').rename(columns={'date':'timestamp'}).set_index('timestamp')

train_set = solar_power[:'8/10/2016']

def split_sequence(sequence, n_steps):
    x, y = list(), list()
    for i in range(len(sequence)):
        
        end_ix = i + n_steps
        
        if end_ix > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        x.append(seq_x)
        y.append(seq_y)
    return array(x), array(y)


n_steps = 3
train_x,train_y = split_sequence(train_set.loc[:,"kWh electricity/day"].values,n_steps)

class ElecDataset(Dataset):
    def __init__(self,feature,target):
        self.feature = feature
        self.target = target
    
    def __len__(self):
        return len(self.feature)
    
    def __getitem__(self,idx):
        item = self.feature[idx]
        label = self.target[idx]
        
        return item,label

class CNN_ForecastNet(nn.Module):
    def __init__(self):
        super(CNN_ForecastNet,self).__init__()
        self.conv1d = nn.Conv1d(3,64,kernel_size=1)
        self.relu = nn.ReLU(inplace=True)
        self.fc1 = nn.Linear(64*2,50)
        self.fc2 = nn.Linear(50,1)
        
    def forward(self,x):
        x = self.conv1d(x)
        x = self.relu(x)
        x = x.view(-1)
        print('x size',x.size())
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        
        return x

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CNN_ForecastNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.MSELoss()

train_losses = []

def Train():
    
    running_loss = .0
    
    model.train()
    
    for idx, (inputs,labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        print('--- idx ---',idx)
        print('inputs ',inputs)
        preds = model(inputs.float())
        loss = criterion(preds,labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss
        
    train_loss = running_loss/len(train_loader)
    train_losses.append(train_loss.detach().numpy())
    
    print(f'train_loss {train_loss}')
    

train = ElecDataset(train_x.reshape(train_x.shape[0],train_x.shape[1],1),train_y)
train_loader = torch.utils.data.DataLoader(train,batch_size=2,shuffle=False)

epochs = 1
for epoch in range(epochs):
    print('epochs {}/{}'.format(epoch+1,epochs))
    Train()
    gc.collect()

eqy · June 9, 2021, 5:06am

This is strange as usually fully connected layers do not depend on the batch size during training, as the data usually has a shape like (N, D) where N is the batch dimension and D is the hidden dimension or sequence length.
What is the purpose of the x.view(-1)? Is the data to be interpreted as a time series? Otherwise mixing data across the batch dimension is strange.

You might want want to consider whether the batch dimension used here actually corresponds to the meaning of a batch with the dataloader, as typically samples within a batch are not mixed (with the exception of layers like batchnorm). If you decide that the use of batch is actually correct here, you can try some common techniques like simply padding the input for edge cases where the data does not fill the expected input size.

Huaiyang_Gongzi · June 9, 2021, 6:44am

Thank you for the response. I should change x.view() to x.flatten().