Memory allocation issue

import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

class ChannelAttention(nn.Module):
def init(self, in_channels, reduction_ratio=16):
super(ChannelAttention, self).init()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.fc1 = nn.Conv2d(in_channels, in_channels // reduction_ratio, kernel_size=1, stride=1, padding=0)
self.relu = nn.ReLU()
self.fc2 = nn.Conv2d(in_channels // reduction_ratio, in_channels, kernel_size=1, stride=1, padding=0)

def forward(self, x):
    avg_out = self.fc2(self.relu(self.fc1(self.avg_pool(x))))
    max_out = self.fc2(self.relu(self.fc1(self.max_pool(x))))
    out = avg_out + max_out
    return torch.sigmoid(out) * x

class SpatialAttention(nn.Module):
def init(self, kernel_size=7):
super(SpatialAttention, self).init()
assert kernel_size in (3, 7), “kernel size must be 3 or 7”
padding = 3 if kernel_size == 7 else 1
self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding)

def forward(self, x):
    avg_out = torch.mean(x, dim=1, keepdim=True)
    max_out, _ = torch.max(x, dim=1, keepdim=True)
    out = torch.cat([avg_out, max_out], dim=1)
    out = self.conv(out)
    return torch.sigmoid(out) * x

class DualAttentionModule(nn.Module):
def init(self, in_channels):
super(DualAttentionModule, self).init()
self.channel_attention = ChannelAttention(in_channels)
self.spatial_attention = SpatialAttention()

def forward(self, x):
    out = self.channel_attention(x) * x
    out = self.spatial_attention(out) * out
    return out

class DualAttentionNetwork(nn.Module):
def init(self, num_classes=10):
super(DualAttentionNetwork, self).init()
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
self.relu = nn.ReLU()
self.dual_attention1 = DualAttentionModule(64)
self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.dual_attention2 = DualAttentionModule(128)
self.fc1 = nn.Linear(128 * 4 * 4, 1024) # Adjusted the input size for fc1
self.fc2 = nn.Linear(1024, num_classes)

def forward(self, x):
    out = self.conv1(x)
    out = self.relu(out)
    out = self.dual_attention1(out)
    out = self.conv2(out)
    out = self.relu(out)
    out = self.pool(out)
    out = self.dual_attention2(out)
    out = out.view(out.size(0), -1)
    out = self.fc1(out)
    out = self.relu(out)
    out = self.fc2(out)
    return out

Set device (GPU or CPU)

device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)

Define hyperparameters

num_epochs = 10
batch_size = 2
learning_rate = 0.001

Load the dataset and apply transformations

transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.ImageFolder(root=r’C:\Users\tmlan\OneDrive\Desktop\MODIFIED\UAVWeedSegmentation-main\training-data\trainval’, transform=transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

Initialize the Dual Attention Network

model = DualAttentionNetwork().to(device)

Define loss function and optimizer

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

Training loop

total_steps = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)

    # Forward pass
    outputs = model(images)
    loss = criterion(outputs, labels)

    # Backward pass and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print training progress
    if (i + 1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_steps}], Loss: {loss.item():.4f}")

Save the trained model

torch.save(model.state_dict(), “dual_attention_model.pt”)
I am getting error as :
RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x1605632 and 2048x1024)

This shape mismatch is not a memory allocation issue and is raised in self.fc1.
Either set its in_features to 1605632 or reduce the size of the incoming activation to create 2048 features.

I am not able to understand could you pls tell which part of the code needs to be modified so that it doesnot cause any issue , please…

Replace:

self.fc1 = nn.Linear(128 * 4 * 4, 1024)

with:

self.fc1 = nn.Linear(1605632, 1024)

Again getting this error.When replaced with above.
RuntimeError: [enforce fail at C:\actions-runner_work\pytorch\pytorch\builder\windows\pytorch\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 6576668672 bytes.

In that case reduce the input size or change the model architecture so that the activation input to this linear layer will be smaller, since you are running out of memory.