def forward(self, x, mask, hx=None, cx=None, words=None):
# Release cache
# torch.cuda.empty_cache()
# Check device and move tensors if necessary
# Define model and input_data variables
model = Att_BLSTM(embedding_matrix, class_num, device=self.device)
input_data = torch.tensor(X_train_vectorized, dtype=torch.float32).to(self.device)
device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
model = model.to(device)
input_data = input_data.to(device)
model.embedding.weight = model.embedding.weight.to(device)
# Process input data
text = torch.tensor(X_train_vectorized, dtype=torch.float32).to(device) # Move text to the device
words = text.unsqueeze(0).view(1, -1, 5000) # Adjust words shape
assert words is not None
# Process input data
words = words.view(-1, 753800, 100) # Reshape to the correct shape
# Reshape input data to shape (7538, 100, 5000)
# words = words.view(7538, 100, 5000)
# Use an alternative reshaping method
batch_size = 753
words = words.reshape(batch_size, 100, 5000)
# Process each batch separately
outputs_list = []
for batch in words:
print("Original size:", text.size())
print("Reshaped size:", x.size())
# print("Shape of batch before transpose:", batch.shape)
x = batch.transpose(0, 1).unsqueeze(1).view(100, -1, 753800)
# print("Shape of batch after transpose:", batch.shape)
batch_size = batch.size(0)
x = batch.view(batch_size, 15076)
# Assuming you want to add an extra dimension at the beginning
x = x.unsqueeze(0)
# Get the batch size
if words is not None:
words = words.unsqueeze(0)
else:
words = None
# Reshape the input
# Process input data
if words is not None:
# Transpose the word embedding layer output to shape (seq_len, batch_size, embedding_dim)
words = words.transpose(0, 1)
# Concatenate the word embedding layer output with the hidden layer, making it shape (seq_len, batch_size, embedding_dim + hidden_dim)
# Here, a bidirectional recurrent neural network is used, so the shape of the hidden layer output is (seq_len, batch_size, hidden_size * 2)
# It needs to be transposed to shape (seq_len, batch_size, hidden_size * 2)
x = torch.cat((words, self.lstm(words)[0].transpose(0, 1)), 2)
else:
x = x.view(-1, 753800, 15076)
# Handle the case when words is None
if words is None:
print("Words is None. Returning default value.")
return torch.zeros(1, x.size(1), self.hidden_dim * 2) # Adjust the size as needed
# Process attention mechanism
# Convert x to a tensor of shape (batch_size, 100, 7538),
# so that the LSTM layer can process it
# Reshape input data to the correct shape
# Reshape input data to shape (753800, 15076)
x = x.view(-1, 15076)
batch_size = x.size(0)
# Process attention mechanism
# Convert x to a tensor of shape (batch_size, seq_len, hidden_dim)
x = x.view(batch_size // 100, 100, x.size(2))
print("Original size:", x.size())
# Use attention mechanism to weight the output and get the final output
x = self.attention(x)
# Reshape the output to the original shape
x = x.view(batch_size, 15076)
return x
# Compute attention weights
attention_weights = self.att_weight(x).squeeze(2)
attention_weights = F.softmax(attention_weights, dim=1)
# Apply attention weights
weighted_inputs = torch.bmm(attention_weights.unsqueeze(1), x).squeeze(1)
# Pass attention state to the fully connected layer
# Get the probability distribution of predicted categories
output = self.attention_layer(weighted_inputs, mask)
# Reshape the output to the original shape
output = output.transpose(0, 1)
return output
# Initialize hidden layer
if hx is None:
hx = torch.nn.init.uniform_(torch.empty(1, 1, self.hidden_dim), -0.1, 0.1).to(dtype=torch.float32)
hx = hx.to(self.device)
elif hx.dim() != 3:
hx = hx.view(1, 1, hx.shape[0])
if cx is None:
cx = torch.nn.init.uniform_(torch.empty(1, 1, self.hidden_dim), -0.1, 0.1)
cx = cx.to(self.device)
elif cx.dim() != 3:
cx = cx.view(1, 1, cx.shape[0])
# Assuming you want to add an extra dimension at the beginning
words = words.unsqueeze(0)
# Embedding layer
emb = self.embedding(words.long())
# Remove additional dimension
emb = emb.squeeze(3)
# Permute to match LSTM's expected input shape
emb = emb.permute(2, 0, 1)
h, (_, _) = self.lstm(emb, (hx, cx))
return h
# Handle the case when words is None
if words is None:
print("Words is None. Returning default value.")
return torch.zeros(1, x.size(1), self.hidden_dim * 2) # Adjust the size as needed
# LSTM layer
# **Handle sequence length**
# Compute the length of the sequence
# Pack the input sequence for the LSTM to handle
seq_len, batch_size = emb.size(1), emb.size(0)
lengths = torch.full((batch_size,), seq_len, dtype=torch.int32) # Fill with the same sequence length
emb = nn.utils.rnn.pack_padded_sequence(emb, lengths, batch_first=True, enforce_sorted=False)
# **Handle LSTM layer**
# Pass the embedded sequence to the LSTM layer
# Get hidden state and output
h, (_, _) = self.lstm(emb, (hx, cx))
# ToDo: Handle attention mechanism**
# Compute attention weights for each hidden state
# Apply attention weights to hidden states
h = h.squeeze(dim=0)
alpha = self.att_weight.matmul(h.unsqueeze(1))
alpha = F.softmax(alpha, dim=1)
h_att = torch.bmm(alpha, h).squeeze(dim=1)
“This part of the code is causing an error, and it has been bothering me for many days. I have tried many methods to solve it, but none of them worked. I don’t know how to fix this. If you know, please help me. I would be very grateful.”
Error:
Traceback (most recent call last):
File “E:\pythonProject\Att-BLSTM -Project\LSTM-Att-BLSTM\Att-BLSTMCode\Att-BLSTM.py”, line 488, in
train_attention_blstm(attention_blstm_model, train_loader, optimizer, criterion, device)
File “E:\pythonProject\Att-BLSTM -Project\LSTM-Att-BLSTM\Att-BLSTMCode\Att-BLSTM.py”, line 365, in train_attention_blstm
outputs = model(words, mask)
File “D:\Anaconda\envs\pytorch\lib\site-packages\torch\nn\modules\module.py”, line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “D:\Anaconda\envs\pytorch\lib\site-packages\torch\nn\modules\module.py”, line 1527, in _call_impl
return forward_call(*args, **kwargs)
File “E:\pythonProject\Att-BLSTM -Project\LSTM-Att-BLSTM\Att-BLSTMCode\Att-BLSTM.py”, line 103, in forward
words = words.reshape(batch_size, 100, 5000)
RuntimeError: shape ‘[753, 100, 5000]’ is invalid for input of size 75380000