I have concatenated three input layers using the torch.concat()
method. And I want to predict a binary classification sentiment value [0,1]. The output layer is a single nn.Linear()
layer with shape [64,1], where 64 (sentences per batch) and 1 (target 0 or 1).
When I try to pass the concatenated layer to the single output layer I receive the following:
ValueError: Using a target size (torch.Size([64, 1])) that is different to the input size (torch.Size([192, 1])) is deprecated. Please ensure they have the same size.
You could easily guess that 192/3 = 64. Thus the three input layers of shape [64,50] after the concatenation are shaped into [64*3, 1].
How could I pass the concatenated layer into a single output layer and predict the outcome using three pieces of information?
Below is my code:
class SentimentClassifier_trivial_model_complexity(nn.Module):
def __init__(self, trial, non_contributive_token, vocab_size, vector_length, output_dim, activation, use_pretrained_embeddings, embedding_matrix=None, embedding_dimension=None):
#Constructor
super(SentimentClassifier_trivial_model_complexity, self).__init__()
self.layers = []
self.use_pretrained_embeddings = use_pretrained_embeddings
padding_idx = non_contributive_token
linear_hidden_units = trial.suggest_int("input_hidden_units", 8, 128)
#embedding layer
if self.use_pretrained_embeddings:
self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze = True, padding_idx=padding_idx)
#input layer
self.input_layer = nn.Linear(embedding_dimension, linear_hidden_units)
else:
embedding_dimension = trial.suggest_int("embedding_dimension", 10, 100)
self.embedding = nn.Embedding(vocab_size, embedding_dimension, padding_idx=padding_idx)
self.embedding.weight.requires_grad = True
#input layer
self.input_layer = nn.Linear(embedding_dimension*vector_length, linear_hidden_units)
#flatten layer
self.flatten_layer = nn.Flatten()
#Specify number of additional layers
self.n_layers = 0
#output layer
self.output_layer = nn.Linear(linear_hidden_units, output_dim)
#activation function
if activation.lower() == "relu":
self.activation = nn.ReLU()
else:
self.activation = nn.Tanh()
#activation of output layer
self.activation_output = nn.Sigmoid()
self.apply(self._init_weights)
def _init_weights(self, module):
if isinstance(module, nn.Linear):
module.weight.data.normal_(mean=0.0, std=1.0)
if module.bias is not None:
module.bias.data.zero_()
elif isinstance(module, nn.Embedding):
module.weight.data.normal_(mean=0.0, std=1.0)
if module.padding_idx is not None:
module.weight.data[module.padding_idx].zero_()
def forward(self, text):
embedded = self.embedding(text)
if self.use_pretrained_embeddings:
embedded_average = torch.mean(embedded, dim=1)
embedded_max = torch.max(embedded, dim=1)[0]
embedded_min = torch.min(embedded, dim=1)[0]
# use of average embeddings transformation
input_layer_average = self.input_layer(embedded_average)
input_layer_average = self.activation(input_layer_average)
#use of max embeddings transformation
input_layer_max = self.input_layer(embedded_max)
input_layer_max = self.activation(input_layer_max)
#use of min embeddings transformation
input_layer_min = self.input_layer(embedded_min)
input_layer_min = self.activation(input_layer_min)
else:
embedded = self.flatten_layer(embedded)
#concatenation of the 3 input layers
input_layer = torch.concat([input_layer_average, input_layer_max, input_layer_min], dim=0)
input_layer = self.activation(input_layer)
# print(input_layer.shape) #[192,1] vs [64,1] -> output layer
output_layer = self.output_layer(input_layer)
output_layer = self.activation_output(output_layer)
return output_layer