Hi,
I have an implementation of the C3D network as follows :-
import torch.nn as nn
class C3D(nn.Module):
def __init__(self):
super(C3D, self).__init__()
self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
self.conv2 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))
self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
self.conv3a = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
self.conv3b = nn.Conv3d(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
self.conv4a = nn.Conv3d(256, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
self.conv4b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
self.conv5a = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
self.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
self.pool5 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 1, 1))
self.fc6 = nn.Linear(8192, 4096)
self.fc7 = nn.Linear(4096, 4096)
self.fc8 = nn.Linear(4096, 487)
self.dropout = nn.Dropout(p=0.5)
self.relu = nn.ReLU()
self.softmax = nn.Softmax()
def forward(self, x):
h = self.relu(self.conv1(x))
h = self.pool1(h)
h = self.relu(self.conv2(h))
h = self.pool2(h)
h = self.relu(self.conv3a(h))
h = self.relu(self.conv3b(h))
h = self.pool3(h)
h = self.relu(self.conv4a(h))
h = self.relu(self.conv4b(h))
h = self.pool4(h)
h = self.relu(self.conv5a(h))
h = self.relu(self.conv5b(h))
h = self.pool5(h)
h = h.view(-1, 8192)
h = self.relu(self.fc6(h))
h = self.dropout(h)
h = self.relu(self.fc7(h))
h = self.dropout(h)
logits = self.fc8(h)
probs = self.softmax(logits)
return probs
Now I want to remove the last 5 layers . Simply put I want the 4096 vector output from the fc6 layer. I have the pretained weights in a pickle file…
The following is my implementation. :-
import numpy as np
import torch.nn as nn
import torch
from torch.autograd import Variable
from C3D_Model_Main import C3D
class C3D_Model(nn.Module):
activation = {}
def __init__(self):
super(C3D_Model, self).__init__()
net_c3d = C3D()
net_c3d.load_state_dict(torch.load('c3d.pickle')) # loading the pretrained weights pickle here
modules = list(net_c3d.children())[:-5]
# modules = list(net_c3d.children())[:-6]
self.new_model = nn.Sequential(*modules)
def forward(self, x):
"""Extract feature vectors from input images."""
features = self.new_model(x)
return features
def get_activation(name):
activation = {}
def hook(model, input, output):
activation[name] = output.detach()
return hook
def c3Dfeatures(vector):
X = Variable(torch.Tensor(vector))
X = X.cuda()
# get network pretrained model
net = C3D_Model()
net.cuda()
print(net)
output= net(X)
print("ouput type and shape : ", np.shape(output),type(output))
data_reshaped = np.load('112x112_numpy/Arson001_x264.npy')
no_of_groups = data_reshaped.shape[1]
no_of_groups = (int)(np.true_divide(data_reshaped.shape[1], 16))
# print(no_of_groups)
no_of_frames =16
# new_frame_data = np.zeros([1,3,16,240,320])
new_frame_data = np.zeros([1,3,16,112,112])
# print("shape of new_frame_data on init : ",np.shape(new_frame_data))
# print(no_of_groups)
cnt = 0
for i in range(0,no_of_groups * 16,16):
# print(i)
cnt = cnt +1
new_frame_data = data_reshaped[:, i:i + no_of_frames, :, :]
new_frame_data =np.expand_dims(new_frame_data, axis= 0)
prediction = c3Dfeatures(new_frame_data)
the error I get is as follows
line 1354, in linear
output = input.matmul(weight.t())
RuntimeError: size mismatch, m1: [2048 x 4], m2: [8192 x 4096] at /pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:266
I guess there is something wrong where I try to flatten/reshape the tensor after the pool5 layer. Any help/advice would be really nice, as I am stuck at this process.