Hello,
I hope everyone in the community is well.
I’m trying to get my custom data set to return the following:
- Image tensor
- Policy (unique ID)
- numerical columns tensor
- categorical columns tensor
- categorical embedding sizes tuple
I have 1 through 4 coming back correctly. However, when trying to return the embedding size tuples, I am not getting tuples but tensors and I’m not sure why. This throws an error trying to instantiate my model. The below is the code to my data set:
class image_Dataset(Dataset):
'''
image class data set
'''
def __init__(self, data, transform = None):
'''
Args:
------------------------------------------------------------
data = dataframe
image = column in dataframe with absolute path to the image
label = column in dataframe that is the target classification variable
numerical_columns = numerical columns from data
categorical_columns = categorical columns from data
policy = ID variable
'''
self.image_frame = data
self.transform = transform
def __len__(self):
return len(self.image_frame)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
label = self.image_frame.loc[idx, 'target']
pic = Path(self.image_frame.loc[idx,'location'])
img = Image.open(pic)
policy = self.image_frame.loc[idx, 'policy']
sample = {'image': img, 'policy': policy, 'label':label}
numerical_data = self.image_frame.loc[idx, numerical_columns]
if self.transform:
image = self.transform(img)
for category in categorical_columns:
self.image_frame[category] = self.image_frame[category].astype('category')
categorical_column_sizes = [len(self.image_frame[column].cat.categories) for column in categorical_columns]
categorical_embedding_sizes = [(col_size, min(50, (col_size+1)//2)) for col_size in categorical_column_sizes]
self.image_frame[category] = self.image_frame[category].astype('category').cat.codes.values
categorical_data = self.image_frame.loc[idx, categorical_columns]
categorical_data = torch.tensor(categorical_data, dtype = torch.int64)
numerical_data = torch.tensor(numerical_data, dtype = torch.float)
return image, label, policy, numerical_data, categorical_data, categorical_embedding_sizes
I’m not sure if this is needed, but this is the model object:
class Image_Embedd(nn.Module):
def __init__(self, embedding_size):
'''
Args
---------------------------
embedding_size: Contains the embedding size for the categorical columns
num_numerical_cols: Stores the total number of numerical columns
output_size: The size of the output layer or the number of possible outputs.
layers: List which contains number of neurons for all the layers.
p: Dropout with the default value of 0.5
'''
super(Image_Embedd, self).__init__()
self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
self.embedding_dropout = nn.Dropout(p)
self.cnn = models.resnet50(pretrained=False)
self.cnn.fc = nn.Linear(self.cnn.fc.in_features, 1000)
self.fc1 = nn.Linear(1000, 1017)
self.fc2 = nn.Linear(1017, 128)
self.fc3 = nn.Linear(128, 2)
#define the foward method
def forward(self, image, x_numerical, x_categorical):
embeddings = []
for i, e in enumerate(self.all_embeddings):
embeddings.append(e(x_categorical[:,i]))
x1 = self.cnn(image)
x2 = numerical_data
x = torch.cat((x1, x2), dim = 1)
x = torch.cat(embeddings, 1)
#x = F.relu(self.fc1(x))
x = self.fc2(x)
#x = F.relu(self.fc2(x))
x = self.fc3(x)
x = F.log_softmax(x)
return x
And this is the error thrown when I try to instantiate:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-671-2a2bc631b997> in <module>
----> 1 combined_model = Image_Embedd(categorical_embedding_sizes)
2
3 combined_model = combined_model.cuda()
<ipython-input-669-6ab41a05624c> in __init__(self, embedding_size)
14 super(Image_Embedd, self).__init__()
15
---> 16 self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
17 self.embedding_dropout = nn.Dropout(p)
18
<ipython-input-669-6ab41a05624c> in <listcomp>(.0)
14 super(Image_Embedd, self).__init__()
15
---> 16 self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
17 self.embedding_dropout = nn.Dropout(p)
18
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\sparse.py in __init__(self, num_embeddings, embedding_dim, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse, _weight)
95 self.scale_grad_by_freq = scale_grad_by_freq
96 if _weight is None:
---> 97 self.weight = Parameter(torch.Tensor(num_embeddings, embedding_dim))
98 self.reset_parameters()
99 else:
TypeError: new() received an invalid combination of arguments - got (Tensor, Tensor), but expected one of:
* (torch.device device)
* (torch.Storage storage)
* (Tensor other)
* (tuple of ints size, torch.device device)
didn't match because some of the arguments have invalid types: (!Tensor!, !Tensor!)
* (object data, torch.device device)
didn't match because some of the arguments have invalid types: (!Tensor!, !Tensor!)
Thank you for the help.