import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
class ChannelAttention(nn.Module):
def init(self, in_channels, reduction_ratio=16):
super(ChannelAttention, self).init()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.fc1 = nn.Conv2d(in_channels, in_channels // reduction_ratio, kernel_size=1, stride=1, padding=0)
self.relu = nn.ReLU()
self.fc2 = nn.Conv2d(in_channels // reduction_ratio, in_channels, kernel_size=1, stride=1, padding=0)
def forward(self, x):
avg_out = self.fc2(self.relu(self.fc1(self.avg_pool(x))))
max_out = self.fc2(self.relu(self.fc1(self.max_pool(x))))
out = avg_out + max_out
return torch.sigmoid(out) * x
class SpatialAttention(nn.Module):
def init(self, kernel_size=7):
super(SpatialAttention, self).init()
assert kernel_size in (3, 7), “kernel size must be 3 or 7”
padding = 3 if kernel_size == 7 else 1
self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding)
def forward(self, x):
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
out = torch.cat([avg_out, max_out], dim=1)
out = self.conv(out)
return torch.sigmoid(out) * x
class DualAttentionModule(nn.Module):
def init(self, in_channels):
super(DualAttentionModule, self).init()
self.channel_attention = ChannelAttention(in_channels)
self.spatial_attention = SpatialAttention()
def forward(self, x):
out = self.channel_attention(x) * x
out = self.spatial_attention(out) * out
return out
class DualAttentionNetwork(nn.Module):
def init(self, num_classes=10):
super(DualAttentionNetwork, self).init()
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
self.relu = nn.ReLU()
self.dual_attention1 = DualAttentionModule(64)
self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.dual_attention2 = DualAttentionModule(128)
self.fc1 = nn.Linear(128 * 4 * 4, 1024) # Adjusted the input size for fc1
self.fc2 = nn.Linear(1024, num_classes)
def forward(self, x):
out = self.conv1(x)
out = self.relu(out)
out = self.dual_attention1(out)
out = self.conv2(out)
out = self.relu(out)
out = self.pool(out)
out = self.dual_attention2(out)
out = out.view(out.size(0), -1)
out = self.fc1(out)
out = self.relu(out)
out = self.fc2(out)
return out
Set device (GPU or CPU)
device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
Define hyperparameters
num_epochs = 10
batch_size = 2
learning_rate = 0.001
Load the dataset and apply transformations
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
train_dataset = datasets.ImageFolder(root=r’C:\Users\tmlan\OneDrive\Desktop\MODIFIED\UAVWeedSegmentation-main\training-data\trainval’, transform=transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
Initialize the Dual Attention Network
model = DualAttentionNetwork().to(device)
Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
Training loop
total_steps = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward pass and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print training progress
if (i + 1) % 100 == 0:
print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_steps}], Loss: {loss.item():.4f}")
Save the trained model
torch.save(model.state_dict(), “dual_attention_model.pt”)
I am getting error as :
RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x1605632 and 2048x1024)