RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x13056 and 153600x2048)

Grace_Child · November 8, 2024, 6:24am

from torchvision.datasets import ImageFolder # for using our our own custom dataset
import cv2 # for reading images
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset # DataLoader class in python where we can pass data
from torchvision.transforms import transforms
import os #manipulate different file paths and get all the images
from torch import nn as nn #neural network
from torch import optim
import torch.cuda
device = ‘cuda’ if torch.cuda.is_available() else ‘cpu’

images = #creating 2 lists where we shall append our lists of images and labels
labels =
dataPath = r’C:\Users\emman\OneDrive\Desktop\PDD\Plant_leave_diseases_dataset_without_augmentation’
subFolder = os.listdir(dataPath)

for folder in subFolder: #iterating through our subfolders to get our images
label = subFolder.index(folder)
path = os.path.join(dataPath,folder) #we are joing the data path with subfolder
for imglist in os.listdir(path):
image = cv2.imread(os.path.join(path,imglist))
images.append(image)
labels.append(label)

class DataPrep(Dataset): #defining a pytorch class
def init(self,features,labels,transform = None):
self.features = features
self.labels = labels
self.transform = transform
def getitem(self,item):
image = self.features[item]
label = self.labels[item]
if self.transform:
image = self.transform(image)

    return image , label
def __len__(self):                                                # total length of the dataset
    return len(self.labels)

data_trans = transforms.Compose([ #transformation pipeline
transforms.ToTensor(),
transforms.Resize((224,224)), #Resizing images 224 x 224
transforms.Normalize(mean=[0.5], std=[0.5]), #normalise
transforms.RandomHorizontalFlip(), #flipping
transforms.RandomVerticalFlip()
])
dataset = DataPrep(images,labels,data_trans) data pipeline in a shape we want
data_loader = DataLoader(dataset, batch_size=4, shuffle=True) #this will help shuffle images
data_sample = next(iter(data_loader))
print(data_sample)

class PlantDiseaseModel(nn.Module): #class for our model
def init(self):
super(PlantDiseaseModel,self).init()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=10,kernel_size=2,stride=2,padding=0)
self.maxpool = nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
self.activation = nn.ReLU() #to introduce none linearity coverts all the negative values to 0 and maintains +ves
self.linear = nn.Linear(645050,39)
self.flatten = nn.Flatten() #convert multidimensional array or matrices into one dimension vector
def forward(self,data):
data = self.conv1(data)
data = self.activation(data)
data = self.maxpool(data)
data = self.activation(data)
data = self.flatten(data)
data = self.linear(data)
return data
model = PlantDiseaseModel().to(device)
print(model.eval())

learning_rate = 0.001
epoch = 100
optimizer = optim.SGD(model.parameters(),learning_rate)
criterion = nn.CrossEntropyLoss() #loss function

for i in range(epoch):
for image,target in data_loader:
image = image.to(device)
target = target.to(device)
optimizer.zero_grad()
output = model(image)
loss = criterion(output,target) #how far you are from the target and then play with weightd
loss.backward()
optimizer.step()

print(f'Epochs: {i+1} out of {epoch} || Loss: {loss.item()}')

zhengquantao:

who can help me?

import numpy as np
from PIL import Image
import glob
import torch
import torch.nn as nn
import torch.optim
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from torchvision import transforms as T


def read_data():
    """
    read img_file data
    :return: img_path, img_name
    """
    train_data = glob.glob('../train-images/*.jpeg')
    train_label = np.array(
        [train_data[index].split('/')[-1].split('.')[0].split('_')[0] for index in
         range(len(train_data))])
    return train_data, train_label


out_place = (
    "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
    'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z')
transform = T.Compose([
   # T.Resize((128,128)),
    T.ToTensor(),
    T.Normalize(std=[0.5, 0.5, 0.5], mean=[0.5, 0.5, 0.5])
])


def one_hot(word: str) -> np.array:
    tmp = []
    # for i in range(len(word)):
    for i in range(4):
        item_tmp = [0 for x in range(144)]   # [0 for x in range(len(word) * len(out_place))]
        word_idx = out_place.index(word[i].lower())
        item_tmp[i*36+word_idx] = 1
        tmp.append(item_tmp)
    return np.array(tmp)


class DataSet(Dataset):
    def __init__(self):
        self.img_path, self.label = read_data()

    def __getitem__(self, index):
        img_path = self.img_path[index]
        img = Image.open(img_path).convert("RGB")
        img = transform(img)
        label = torch.from_numpy(one_hot(self.label[index])).float()
        return img, label

    def __len__(self):
        return len(self.img_path)


data = DataSet()
data_loader = DataLoader(data, shuffle=True, batch_size=64, drop_last=True)


class CNN_Network(nn.Module):
    def __init__(self):
        super(CNN_Network, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, stride=1, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True)


        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, stride=1, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(stride=2, kernel_size=2),  # 30 80
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, stride=1, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),   # 15 40
        )

        self.fc = nn.Sequential(
            nn.Linear(256 * 15 * 40, 2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 40)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = F.softmax(x, dim=-1)
        return x


model = CNN_Network()

model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

error = nn.MultiLabelSoftMarginLoss()


for i in range(5):
    for (batch_x, batch_y) in data_loader:
        optimizer.zero_grad()
        image = Variable(batch_x)
        label = Variable(batch_y)

        out = model(image)

        loss = error(out, label)

        print(loss)

        loss.backward()
        optimizer.step()

torch.save(model.state_dict(), "model.pth")

my image size [70, 26] and one of images name is okzi.jpeg
I am first touch pytorch so i don`t understand this question

Summary

This text will be hidden

I receive this error RuntimeError: mat1 and mat2 shapes cannot be multiplied (4x200704 and 160000x39)

ptrblck · November 8, 2024, 4:30pm

raises the error since the input activation has 200704 features while 160000 are expected.
I don’t know how you’ve come up with in_features=64*50*50 but you might want to change it to 200704 to fix this error.

HACHOUD_MOHAMMED · November 13, 2024, 3:11am

class FeatureExtractor(nn.Module):
def init(self, in_channels=4, output_size=64): # 4 for stacked frames
super(FeatureExtractor, self).init()
self.conv1 = nn.Conv2d(in_channels, 256, kernel_size=4, stride=1)
self.pool1 = nn.MaxPool2d(2, 2)
self.dropout1 = nn.Dropout(0.2)
self.conv2 = nn.Conv2d(256, 256, kernel_size=3, stride=1)
self.pool2 = nn.MaxPool2d(2, 2)
self.dropout2 = nn.Dropout(0.2)

    # Corrected flattened size
    self.flattened_size = self._calculate_flattened_size(in_channels)
    self.fc1 = nn.Linear(92416, out_features=64)

def _calculate_flattened_size(self, in_channels):
    with torch.no_grad():
        dummy_input = torch.zeros(1, in_channels, 84, 84)
        x = self.conv1(dummy_input)
        x = self.pool1(x)
        x = self.dropout1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.dropout2(x)
        print("Shape before flattening:", x.shape)
        x = x.view(x.size(0), -1)  # Flatten the tensor to (batch_size, num_features)
        print("Shape after flattening:", x.shape)
        # Flatten and get the correct size
        return x.view(1, -1).size(1)
    
def forward(self, x):
    x = F.relu(self.conv1(x))
    x = self.pool1(x)
    x = self.dropout1(x)
    x = F.relu(self.conv2(x))
    x = self.pool2(x)
    x = self.dropout2(x)
    x = self.fc1(x)  # Fully connected layer
    return x

class QNetworkCNN(nn.Module):
def init(self, action_space_size):
super(QNetworkCNN, self).init()
self.feature_extractor = FeatureExtractor()
self.fc2 = nn.Linear(64, action_space_size) # Adjust output size if necessary

def forward(self, state):
    features = self.feature_extractor(state)
    q_values = self.fc2(features)  # Output Q-values
    return q_values

def stack_frames(stacked_frames, state, is_new_episode):
frame = preprocess_state(state)

if is_new_episode:
    # Clear stacked frames with zeros
    for _ in range(4):
        stacked_frames.append(frame)  # Append 2D frame directly
else:
    stacked_frames.append(frame)  # Append 2D frame directly

stacked_state = np.stack(stacked_frames, axis=0)  # Create (4, 84, 84) stacked_state
return stacked_state, stacked_frames

def dqn_train(agent, n_episodes=2000, max_t=10000, eps_start=1.0, eps_end=0.01, eps_decay=0.99995):
scores = # list containing scores from each episode
scores_window = deque(maxlen=100) # last 100 scores
eps = eps_start # initialize epsilon

Initialize stacked_frames inside the function

stacked_frames = deque([np.zeros((84, 84), dtype=np.uint8) for i in range(4)], maxlen=4)

for i_episode in tqdm(range(1, n_episodes + 1)):
    state = env.reset()[0]
    stacked_state, stacked_frames = stack_frames(stacked_frames, state, True)  # Initialize stacked frames


    score = 0
    for t in range(max_t):
        action = agent.act(stacked_state, eps)  # Act on stacked state

        next_state, reward, done, _, _ = env.step(action)
        next_stacked_state, stacked_frames = stack_frames(stacked_frames, next_state, False) # Stack the new frame


        agent.step(stacked_state, action, reward, next_stacked_state, done) # Learn from stacked states

        stacked_state = next_stacked_state  # Important: Update the stacked state
        score += reward
        if done:
            break
    scores_window.append(score)
    scores.append(score)

    # Decay epsilon (exploration factor)
    eps = max(eps_end, eps_decay * eps)

  # Print the average score every 50 episodes
    if i_episode % 50 == 0:
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))

  # Save the trained network weights
torch.save(agent.local_dqn.state_dict(), 'checkpoint.pth')
return scores

agent = DQAgent(state_shape=(4, 84, 84), action_space_size=env.action_space.n, seed=0)
scores = dqn_train(agent)

Shape before flattening: torch.Size([1, 256, 19, 19])
Shape after flattening: torch.Size([1, 92416])
0%| | 0/2000 [00:00<?, ?it/s]

RuntimeError Traceback (most recent call last)
Cell In[35], line 2
1 agent = DQAgent(state_shape=(4, 84, 84), action_space_size=env.action_space.n, seed=0)
----> 2 scores = dqn_train(agent)

Cell In[33], line 22, in dqn_train(agent, n_episodes, max_t, eps_start, eps_end, eps_decay)
18 next_state, reward, done, _, _ = env.step(action)
19 next_stacked_state, stacked_frames = stack_frames(stacked_frames, next_state, False) # Stack the new frame
—> 22 agent.step(stacked_state, action, reward, next_stacked_state, done) # Learn from stacked states
24 stacked_state = next_stacked_state # Important: Update the stacked state
25 score += reward

Cell In[30], line 51, in DQAgent.step(self, state, action, reward, next_state, done)
49 if self.l_step == 0 and len(self.memory) > self.batch_size:
50 experiences = self.memory.sample()
—> 51 self.learn(experiences)
53 # Update target network every UPDATE_EVERY steps
54 self.t_step = (self.t_step + 1) % self.UPDATE_EVERY

Cell In[30], line 68, in DQAgent.learn(self, experiences)
65 dones = torch.tensor(dones, device=device, dtype=torch.bool).unsqueeze(1).to(device)
67 # Get Q-values for the chosen actions from the local network
—> 68 q_expected = self.local_dqn(states).gather(1, actions)
70 # Get max Q-values for the next states from the target network
71 q_targets_next = self.target_dqn(next_states).detach().max(1)[0].unsqueeze(1)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
1551 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1552 else:
→ 1553 return self._call_impl(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
1557 # If we don’t have any hooks, we want to skip the rest of the logic in
1558 # this function, and just call forward.
1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1560 or _global_backward_pre_hooks or _global_backward_hooks
1561 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1562 return forward_call(*args, **kwargs)
1564 try:
1565 result = None

Cell In[28], line 8, in QNetworkCNN.forward(self, state)
7 def forward(self, state):
----> 8 features = self.feature_extractor(state)
9 q_values = self.fc2(features) # Output Q-values
10 return q_values

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
1551 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1552 else:
→ 1553 return self._call_impl(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
1557 # If we don’t have any hooks, we want to skip the rest of the logic in
1558 # this function, and just call forward.
1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1560 or _global_backward_pre_hooks or _global_backward_hooks
1561 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1562 return forward_call(*args, **kwargs)
1564 try:
1565 result = None

Cell In[27], line 37, in FeatureExtractor.forward(self, x)
35 x = self.pool2(x)
36 x = self.dropout2(x)
—> 37 x = self.fc1(x) # Fully connected layer
38 return x

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
1551 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1552 else:
→ 1553 return self._call_impl(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
1557 # If we don’t have any hooks, we want to skip the rest of the logic in
1558 # this function, and just call forward.
1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1560 or _global_backward_pre_hooks or _global_backward_hooks
1561 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1562 return forward_call(*args, **kwargs)
1564 try:
1565 result = None

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/linear.py:117, in Linear.forward(self, input)
116 def forward(self, input: Tensor) → Tensor:
→ 117 return F.linear(input, self.weight, self.bias)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (155648x19 and 92416x64)

I receive this error RuntimeError: mat1 and mat2 shapes cannot be multiplied (155648x19 and 92416x64)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x13056 and 153600x2048)

Initialize stacked_frames inside the function

Shape before flattening: torch.Size([1, 256, 19, 19]) Shape after flattening: torch.Size([1, 92416]) 0%| | 0/2000 [00:00<?, ?it/s]

Shape before flattening: torch.Size([1, 256, 19, 19])
Shape after flattening: torch.Size([1, 92416])
0%| | 0/2000 [00:00<?, ?it/s]