Hello all
I have this navie question about: Is there any difference, in terms of training or gradient updating process, between batch-style or for-loop style for data that has time-axis, like video data ?
In my opinion, batch style work as in:
class network(nn.Module):
def __init__(self):
self.batch = 16
self.T_length = 15
self.conv = nn.Conv2d(...)
self.cls = nn.Linear(...)
def forward(self, Input):
# Input has size [batch * time_length, 3, 224, 224]
out = self.conv(Input) # [batch * time_length, 128, 28, 28]
out = out.view(batch * time_length, -1)
score = self.cls(out)
score = score.view(batch, time_length, -1)
# score for single video is the average over all frames
score = torch.mean(score, axis=1)
return
While, for-loop style work as
class network(nn.Module):
def __init__(self):
self.batch = 16
self.T_length = 15
self.conv = nn.Conv2d(...)
self.cls = nn.Linear(...)
def forward(self, Input):
# Input has size [time_length, batch, 3, 224, 224]
score = []
for i in input:
score.append(self.cls(self.conv(Input).view(batch, -1)))
# concate list or tensor to new tensor
score = torck.stack(score, axis=1) # [ batch, time_length, num_class]
# average over time axis
score = torch.mean(score, axis=1)
return score
# Then optimize