The size of the input data does not match the desired shape, so the operation cannot create the desired shape

steven1 · December 29, 2023, 2:20pm

def forward(self, x, mask, hx=None, cx=None, words=None):
# Release cache
# torch.cuda.empty_cache()
# Check device and move tensors if necessary
# Define model and input_data variables
model = Att_BLSTM(embedding_matrix, class_num, device=self.device)
input_data = torch.tensor(X_train_vectorized, dtype=torch.float32).to(self.device)
device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
model = model.to(device)
input_data = input_data.to(device)
model.embedding.weight = model.embedding.weight.to(device)

# Process input data
text = torch.tensor(X_train_vectorized, dtype=torch.float32).to(device)  # Move text to the device
words = text.unsqueeze(0).view(1, -1, 5000)  # Adjust words shape
assert words is not None

# Process input data
words = words.view(-1, 753800, 100)  # Reshape to the correct shape
# Reshape input data to shape (7538, 100, 5000)
# words = words.view(7538, 100, 5000)
# Use an alternative reshaping method
batch_size = 753
words = words.reshape(batch_size, 100, 5000)
# Process each batch separately
outputs_list = []

for batch in words:
    print("Original size:", text.size())
    print("Reshaped size:", x.size())

    # print("Shape of batch before transpose:", batch.shape)
    x = batch.transpose(0, 1).unsqueeze(1).view(100, -1, 753800)
    # print("Shape of batch after transpose:", batch.shape)
    batch_size = batch.size(0)
    x = batch.view(batch_size, 15076)

    # Assuming you want to add an extra dimension at the beginning
    x = x.unsqueeze(0)
    # Get the batch size
    if words is not None:
        words = words.unsqueeze(0)
    else:
        words = None
        # Reshape the input
        # Process input data
        if words is not None:
            # Transpose the word embedding layer output to shape (seq_len, batch_size, embedding_dim)
            words = words.transpose(0, 1)

            # Concatenate the word embedding layer output with the hidden layer, making it shape (seq_len, batch_size, embedding_dim + hidden_dim)
            # Here, a bidirectional recurrent neural network is used, so the shape of the hidden layer output is (seq_len, batch_size, hidden_size * 2)
            # It needs to be transposed to shape (seq_len, batch_size, hidden_size * 2)
            x = torch.cat((words, self.lstm(words)[0].transpose(0, 1)), 2)
        else:
            x = x.view(-1, 753800, 15076)

        # Handle the case when words is None
        if words is None:
            print("Words is None. Returning default value.")
            return torch.zeros(1, x.size(1), self.hidden_dim * 2)  # Adjust the size as needed

    # Process attention mechanism
    # Convert x to a tensor of shape (batch_size, 100, 7538),
    # so that the LSTM layer can process it
    # Reshape input data to the correct shape
    # Reshape input data to shape (753800, 15076)
    x = x.view(-1, 15076)
    batch_size = x.size(0)
    # Process attention mechanism
    # Convert x to a tensor of shape (batch_size, seq_len, hidden_dim)
    x = x.view(batch_size // 100, 100, x.size(2))
    print("Original size:", x.size())
    # Use attention mechanism to weight the output and get the final output
    x = self.attention(x)

    # Reshape the output to the original shape
    x = x.view(batch_size, 15076)

    return x
    # Compute attention weights
    attention_weights = self.att_weight(x).squeeze(2)
    attention_weights = F.softmax(attention_weights, dim=1)

    # Apply attention weights
    weighted_inputs = torch.bmm(attention_weights.unsqueeze(1), x).squeeze(1)

    # Pass attention state to the fully connected layer
    # Get the probability distribution of predicted categories
    output = self.attention_layer(weighted_inputs, mask)
    # Reshape the output to the original shape
    output = output.transpose(0, 1)
    return output

# Initialize hidden layer
if hx is None:
    hx = torch.nn.init.uniform_(torch.empty(1, 1, self.hidden_dim), -0.1, 0.1).to(dtype=torch.float32)
    hx = hx.to(self.device)
elif hx.dim() != 3:
    hx = hx.view(1, 1, hx.shape[0])

if cx is None:
    cx = torch.nn.init.uniform_(torch.empty(1, 1, self.hidden_dim), -0.1, 0.1)
    cx = cx.to(self.device)
elif cx.dim() != 3:
    cx = cx.view(1, 1, cx.shape[0])

# Assuming you want to add an extra dimension at the beginning
words = words.unsqueeze(0)
# Embedding layer
emb = self.embedding(words.long())
# Remove additional dimension
emb = emb.squeeze(3)
# Permute to match LSTM's expected input shape
emb = emb.permute(2, 0, 1)
h, (_, _) = self.lstm(emb, (hx, cx))
return h
# Handle the case when words is None
if words is None:
    print("Words is None. Returning default value.")
    return torch.zeros(1, x.size(1), self.hidden_dim * 2)  # Adjust the size as needed

# LSTM layer
# **Handle sequence length**
# Compute the length of the sequence
# Pack the input sequence for the LSTM to handle

seq_len, batch_size = emb.size(1), emb.size(0)
lengths = torch.full((batch_size,), seq_len, dtype=torch.int32)  # Fill with the same sequence length
emb = nn.utils.rnn.pack_padded_sequence(emb, lengths, batch_first=True, enforce_sorted=False)

# **Handle LSTM layer**
# Pass the embedded sequence to the LSTM layer
# Get hidden state and output

h, (_, _) = self.lstm(emb, (hx, cx))

# ToDo: Handle attention mechanism**
# Compute attention weights for each hidden state
# Apply attention weights to hidden states

h = h.squeeze(dim=0)
alpha = self.att_weight.matmul(h.unsqueeze(1))
alpha = F.softmax(alpha, dim=1)
h_att = torch.bmm(alpha, h).squeeze(dim=1)

“This part of the code is causing an error, and it has been bothering me for many days. I have tried many methods to solve it, but none of them worked. I don’t know how to fix this. If you know, please help me. I would be very grateful.”

Error：
Traceback (most recent call last):
File “E:\pythonProject\Att-BLSTM -Project\LSTM-Att-BLSTM\Att-BLSTMCode\Att-BLSTM.py”, line 488, in
train_attention_blstm(attention_blstm_model, train_loader, optimizer, criterion, device)
File “E:\pythonProject\Att-BLSTM -Project\LSTM-Att-BLSTM\Att-BLSTMCode\Att-BLSTM.py”, line 365, in train_attention_blstm
outputs = model(words, mask)
File “D:\Anaconda\envs\pytorch\lib\site-packages\torch\nn\modules\module.py”, line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “D:\Anaconda\envs\pytorch\lib\site-packages\torch\nn\modules\module.py”, line 1527, in _call_impl
return forward_call(*args, **kwargs)
File “E:\pythonProject\Att-BLSTM -Project\LSTM-Att-BLSTM\Att-BLSTMCode\Att-BLSTM.py”, line 103, in forward
words = words.reshape(batch_size, 100, 5000)
RuntimeError: shape ‘[753, 100, 5000]’ is invalid for input of size 75380000