Hello.
I’ve been working on a project for the past months, and my current goal is to be able to make the i3d network work. I have picked the unofficial implementation in pytorch(the original one was in keras if I recall correctly).
I’m working on google Colab with a subset of the real dataset, and the purpose of this would be to see if everything works first.
At the moment I’m getting a CUDA error, but I have tried all the solutions I have readed around the internet(both here and stack overflow) and none of them seemed to be able to help.
One of the most suggested approach to debug was to run the code with my CPU instead of the GPU and the error should be revealed for which one it is really, but if I do that it never pops up, so I think that the main problem is something related to the GPU.
Also another recurrent solution is to take a look closely to the target labels, but I printed them(I left the code commented) and they are fine.
To conclude, as you can see, I added also the os.environ[‘CUDA_LAUNCH_BLOCKING’] = “1” line but nothing more is showed.
I’m honestly blocked, so if any expert here can help me with this, it would be awesome. I have not provided the whole code because the network is a super huge code, hope it’s not a problem
Hoping to solve this issue,
I wish you all a pleasent week!
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
dataset_path = "/mypath/temporalReduced_dataset"
annotation_path = dataset_path+"/annotations.txt"
device = torch.device("cuda:0")
def run(init_lr=0.1, max_steps=64e3, mode='rgb', path="my/path/reduced_dataset", annotation_path ="my/path/reduced_dataset/annotations.txt", num_segment= 16,frames_per_segment=1, batch_size=4, save_model=''"):
# setup dataset
preprocess = transforms.Compose([
ImglistToTensor(), # list of PIL images to (FRAMES x CHANNELS x HEIGHT x WIDTH) tensor
transforms.CenterCrop(224), # image batch, center crop to square 299x299
transforms.RandomHorizontalFlip(),
])
dataset = VideoFrameDataset(
root_path=path,
annotationfile_path=annotation_path,
num_segments=num_segment,
frames_per_segment=frames_per_segment,
imagefile_template='img_{:07d}.jpg',
transform=preprocess,
test_mode=False)
train_dataset,validation_dataset=torch.utils.data.random_split(dataset, [int(70*len(dataset)/100),int(30*len(dataset)/100)])
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # 4 core nella cpu
valid_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)
# labels_ = set()
# for i, (inputs, targets) in enumerate(train_loader):
# for elem in list(targets):
# labels_.add(elem.item())
# print(labels_)
class_path= glob.glob(dataset_path+"/*")
classes = []
for path in class_path:
classes.append(path.split('/')[-1] )
classes = sorted(classes)
classes.pop()
num_classes=len(classes)
# Report split sizes
print('Training set has {} instances'.format(len(train_dataset)))
print('Validation set has {} instances'.format(len(validation_dataset)))
dataloaders = {'train': train_loader, 'val': valid_loader}
datasets = {'train': train_dataset, 'val': validation_dataset}
# setup the model
if mode == 'flow':
i3d = InceptionI3d(400, in_channels=2)
i3d.load_state_dict(torch.load('models/flow_imagenet.pt'))
else:
i3d = InceptionI3d(400, in_channels=3)
i3d.load_state_dict(torch.load('/my/path/rgb_imagenet.pt'))
i3d.replace_logits(num_classes)
#i3d.load_state_dict(torch.load('/ssd/models/000920.pt'))
#print(i3d)
i3d.to(device) <-- HERE IS THE ERROR ACCORDING TO COLAB
i3d = nn.DataParallel(i3d)
lr = init_lr
optimizer = optim.SGD(i3d.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001)
lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000])
num_steps_per_update = 4 # accum gradient
steps = 0
# train it
while steps < max_steps:#for epoch in range(num_epochs):
print('Step {}/{}'.format(steps, max_steps))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
i3d.train(True)
else:
i3d.train(False) # Set model to evaluate mode
tot_loss = 0.0
tot_loc_loss = 0.0
tot_cls_loss = 0.0
num_iter = 0
optimizer.zero_grad()
# Iterate over data.
for data in dataloaders[phase]: #train_dataloader, validation_dataloader
num_iter += 1
# get the inputs
inputs, labels = data
inputs = Variable(inputs.cuda())
inputs = torch.transpose(inputs,1,2)
t = inputs.size(2)
labels = Variable(labels.cuda())
per_frame_logits = i3d(inputs) #torch.Size([4, 3, 1, 4, 4])
print(per_frame_logits.shape)
per_frame_logits = F.softmax(per_frame_logits)
# compute localization loss
loc_loss = F.cross_entropy(torch.squeeze(per_frame_logits), labels)
tot_loc_loss += loc_loss.item()
loc_loss.backward()
if num_iter == num_steps_per_update and phase == 'train':
steps += 1
num_iter = 0
optimizer.step()
optimizer.zero_grad()
lr_sched.step()
if steps % 10 == 0:
print('{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/(10*num_steps_per_update), tot_cls_loss/(10*num_steps_per_update), tot_loss/10))
# save model
torch.save(i3d.module.state_dict(), save_model+str(steps).zfill(6)+'.pt')
tot_loss = tot_loc_loss = tot_cls_loss = 0.
if phase == 'val':
print('{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/num_iter, tot_cls_loss/num_iter, (tot_loss*num_steps_per_update)/num_iter))
if __name__ == '__main__':
# need to add argparse
run(path=dataset_path, annotation_path=annotation_path)