RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x13056 and 153600x2048)

My act function is defined as

def act(self, state, add_noise=True):
    # state is converted to tensor
    state = torch.from_numpy(state).float().to(device)
    # action is initialized as a zero vector
    acts = np.zeros((self.number_agents, self.action_size))
    # set the actor network to evaluation mode
    self.actor_local.eval()

    with torch.no_grad():  # no gradient is calculated, no backpropagation
        if self.number_agents > 1:  # if there are multiple agents
            for agent in range(self.number_agents):  # for each agent in the environment
                acts[agent, :] = self.actor_local(state[agent, :]).cpu().data.numpy()
        else:  # if there is only one agent
            acts[0, :] = self.actor_local(state).cpu().data.numpy()

    # set the actor network to training mode
    self.actor_local.train()
    # noise is added to the action
    if add_noise:
        acts += self.noise.sample()
    # return clipped action as the output, value from -1 to 1
    #return acts
    return np.clip(acts, -1, 1)

My actor network is defined as

class Actor(nn.Module):

def __init__(self, state_size, action_size, seed):
    super(Actor, self).__init__()  # inherit from nn.Module
    self.seed = torch.manual_seed(seed)  #
    self.fc1 = nn.Linear(state_size, 400)  # 400 is the number of nodes in the first layer
    self.fc2 = nn.Linear(400, 300)  # 300 is the number of nodes in the second layer
    self.fc3 = nn.Linear(300, action_size)  # 10 is the number of nodes in the third layer
    self.bn = nn.BatchNorm1d(400)  # batch normalization, 400 is the number of nodes in the first layer
    self.reset_parameters()

def reset_parameters(self):
    self.fc1.weight.data.uniform_(*hidden_init(self.fc1))
    self.fc2.weight.data.uniform_(*hidden_init(self.fc2))
    self.fc3.weight.data.uniform_(-3e-3, 3e-3)

def forward(self, state):  # forward propagation is used to calculate the output
    """Build an actor (policy) network that maps states -> actions."""
    x = F.relu(self.fc1(state))
    x = self.bn(x)
    x = F.relu(self.fc2(x))
    return F.tanh(self.fc3(x))

fails with the shape mismatch. Fix it by adapting the in_features of the linear layer as already described in this topic.

the input layer in features must be like your input data shape.

can you help me with my code? i get the same error

!git clone https://github.com/philtabor/Youtube-Code-Repository.git
%cd Youtube-Code-Repository/ReinforcementLearning/PolicyGradient/PPO/torch

import os
import numpy as np
import torch as T
import torch.nn as nn
import torch.optim as optim
from torch.distributions import MultivariateNormal
import matplotlib.pyplot as plt

# Define PPO memory class
class PPOMemory:
    def __init__(self, batch_size):
        self.states = []
        self.probs = []
        self.vals = []
        self.actions = []
        self.rewards = []
        self.dones = []

        self.batch_size = batch_size

    def generate_batches(self):
        n_states = len(self.states)
        batch_start = np.arange(0, n_states, self.batch_size)
        indices = np.arange(n_states, dtype=np.int64)
        np.random.shuffle(indices)
        batches = [indices[i:i+self.batch_size] for i in batch_start]

        return np.array(self.states),\
                np.array(self.actions),\
                np.array(self.probs),\
                np.array(self.vals),\
                np.array(self.rewards),\
                np.array(self.dones),\
                batches

    def store_memory(self, state, action, probs, vals, reward, done):
        self.states.append(state)
        self.actions.append(action)
        self.probs.append(probs)
        self.vals.append(vals)
        self.rewards.append(reward)
        self.dones.append(done)

    def clear_memory(self):
        self.states = []
        self.probs = []
        self.actions = []
        self.rewards = []
        self.dones = []
        self.vals = []

# Define the actor network
class ActorNetwork(nn.Module):
    def __init__(self, n_actions, input_dims, alpha, fc1_dims=256, fc2_dims=256, chkpt_dir='tmp/ppo'):
        super(ActorNetwork, self).__init__()

        self.checkpoint_file = os.path.join(chkpt_dir, 'actor_torch_ppo')
        self.actor = nn.Sequential(
            nn.Linear(input_dims[0], fc1_dims),
            nn.ReLU(),
            nn.Linear(fc1_dims, fc2_dims),
            nn.ReLU(),
            nn.Linear(fc2_dims, n_actions)
        )
        self.log_std = nn.Parameter(T.zeros(n_actions))

        self.optimizer = optim.Adam(self.parameters(), lr=alpha)
        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, state):
        print('!!!!!!!state shape is:' ,state.shape)
        mu = self.actor(state)
        mu = sanitize_tensor(mu)  # Sanitize output of actor network
        dist = MultivariateNormal(mu, T.diag_embed(self.log_std.exp().expand_as(mu)))
        return dist  ##change

    def save_checkpoint(self):
        os.makedirs(os.path.dirname(self.checkpoint_file), exist_ok=True)
        T.save(self.state_dict(), self.checkpoint_file)

    def load_checkpoint(self):
        self.load_state_dict(T.load(self.checkpoint_file))

# Define the critic network
class CriticNetwork(nn.Module):
    def __init__(self, input_dims, alpha, fc1_dims=256, fc2_dims=256, chkpt_dir='tmp/ppo'):
        super(CriticNetwork, self).__init__()

        self.checkpoint_file = os.path.join(chkpt_dir, 'critic_torch_ppo')
        self.critic = nn.Sequential(
            nn.Linear(input_dims[0], fc1_dims),
            nn.ReLU(),
            nn.Linear(fc1_dims, fc2_dims),
            nn.ReLU(),
            nn.Linear(fc2_dims, 1)
        )

        self.optimizer = optim.Adam(self.parameters(), lr=alpha)
        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, state):
        value = self.critic(state)
        value = sanitize_tensor(value)
        return value

    def save_checkpoint(self):
        os.makedirs(os.path.dirname(self.checkpoint_file), exist_ok=True)
        T.save(self.state_dict(), self.checkpoint_file)

    def load_checkpoint(self):
        self.load_state_dict(T.load(self.checkpoint_file))

# Define the agent
class Agent:
    def __init__(self, n_actions, input_dims, gamma=0.99, alpha=0.0003, gae_lambda=0.95, policy_clip=0.2, batch_size=64, n_epochs=10):
        self.gamma = gamma
        self.policy_clip = policy_clip
        self.n_epochs = n_epochs
        self.gae_lambda = gae_lambda

        self.actor = ActorNetwork(n_actions, input_dims, alpha)
        self.critic = CriticNetwork(input_dims, alpha)
        self.memory = PPOMemory(batch_size)

    def remember(self, state, action, probs, vals, reward, done):
        self.memory.store_memory(state, action, probs, vals, reward, done)

    def save_models(self):
        print('... saving models ...')
        self.actor.save_checkpoint()
        self.critic.save_checkpoint()

    def load_models(self):
        print('... loading models ...')
        self.actor.load_checkpoint()
        self.critic.load_checkpoint()

    def choose_action(self, observation):
      # Do not reshape observation, keep it as (state_size,)
        #state = T.tensor([observation], dtype=T.float).to(self.actor.device)
        #state = T.tensor(observation, dtype=T.float).unsqueeze(0).to(self.actor.device)  # Ensure proper shape

        # Ensure observation is a numpy array and has at least two dimensions
        #if isinstance(observation, float):
            #observation = np.array([observation])
       # else:
            #observation = np.atleast_2d(observation)


         # Ensure proper shape of the state tensor
        #if len(observation.shape) == 1:
        #   state = T.tensor([observation], dtype=T.float).to(self.actor.device)
        #else:
        state = T.tensor(observation, dtype=T.float).unsqueeze(0).to(self.actor.device)



        print(f"State shape: {state.shape}")  # Debugging print statement

        dist = self.actor(state)
        value = self.critic(state)

        action = dist.sample()
        action = action.cpu().detach().numpy().flatten()

        expected_size = 5 * (env.No_TX_UAVs + env.No_Jam_UAVs)
        if action.size != expected_size:
            raise ValueError(f'Action array has incorrect size. Expected {expected_size}, got {action.size}')

        probs = dist.log_prob(T.tensor(action).to(self.actor.device)).cpu().detach().numpy().flatten()
        value = T.squeeze(value).item()

        return action, probs, value

    def learn(self):
        for _ in range(self.n_epochs):
            state_arr, action_arr, old_prob_arr, vals_arr,\
            reward_arr, dones_arr, batches = \
                    self.memory.generate_batches()

            values = vals_arr
            advantage = np.zeros(len(reward_arr), dtype=np.float32)

            for t in range(len(reward_arr)-1):
                discount = 1
                a_t = 0
                for k in range(t, len(reward_arr)-1):
                    a_t += discount*(reward_arr[k] + self.gamma*values[k+1]*\
                            (1-int(dones_arr[k])) - values[k])
                    discount *= self.gamma*self.gae_lambda
                advantage[t] = a_t
            advantage = T.tensor(advantage).to(self.actor.device)

            values = T.tensor(values).to(self.actor.device)
            for batch in batches:
                states = T.tensor(state_arr[batch], dtype=T.float).to(self.actor.device)
                old_probs = T.tensor(old_prob_arr[batch]).to(self.actor.device)
                actions = T.tensor(action_arr[batch]).to(self.actor.device)

                dist = self.actor(states)
                critic_value = self.critic(states)

                critic_value = T.squeeze(critic_value)

                new_probs = dist.log_prob(actions)
                new_probs = sanitize_tensor(new_probs)
                prob_ratio = new_probs.exp() / old_probs.exp()
                weighted_probs = advantage[batch] * prob_ratio
                weighted_clipped_probs = T.clamp(prob_ratio, 1-self.policy_clip,
                        1+self.policy_clip)*advantage[batch]
                actor_loss = -T.min(weighted_probs, weighted_clipped_probs).mean()

                returns = advantage[batch] + values[batch]
                critic_loss = (returns-critic_value)**2
                critic_loss = critic_loss.mean()

                total_loss = actor_loss + 0.5*critic_loss
                self.actor.optimizer.zero_grad()
                self.critic.optimizer.zero_grad()
                total_loss.backward()
                self.actor.optimizer.step()
                self.critic.optimizer.step()

        self.memory.clear_memory()

# Plotting function
def plot_learning_curve(x, scores, figure_file):
    running_avg = np.zeros(len(scores))
    for i in range(len(running_avg)):
        running_avg[i] = np.mean(scores[max(0, i-100):(i+1)])
    plt.plot(x, running_avg)
    plt.title('Running average of previous 100 scores')
    plt.savefig(figure_file)

# Helper function to sanitize values
def sanitize_input(value, replacement=0.0):
    if np.isnan(value).any() or np.isinf(value).any():
        return replacement
    return value

# Helper function to sanitize tensor values
def sanitize_tensor(tensor, replacement=0.0):
    tensor = T.where(T.isnan(tensor), T.tensor(replacement, dtype=tensor.dtype).to(tensor.device), tensor)
    tensor = T.where(T.isinf(tensor), T.tensor(replacement, dtype=tensor.dtype).to(tensor.device), tensor)
    return tensor

# Main function with debugging and sanitization
if __name__ == '__main__':
    import time

    seed = 100
    np.random.seed(seed)
    env = WirelessEnvironment(seed=seed, No_TX_UAVs=10, No_Jam_UAVs=2, No_Eav=1)
    N = 20
    state_size = 3 * env.No_TX_UAVs + 2 * env.No_Jam_UAVs + 1
    action_size = 5 * (env.No_TX_UAVs + env.No_Jam_UAVs)
    batch_size = 5
    n_epochs = 4
    alpha = 0.0003
    agent = Agent(n_actions=action_size, batch_size=batch_size,
                  alpha=alpha, n_epochs=n_epochs,
                  input_dims=[state_size])
    n_games = 300

    if not os.path.exists('plots'):
        os.makedirs('plots')

    figure_file = 'plots/cartpole.png'

    best_score = -1000000
    score_history = []

    learn_iters = 0
    avg_score = 0
    n_steps = 0

    old_TX_UAV_locations = np.copy(env.TX_UAVs_locations)

    #start_time = time.time()

    for i in range(n_games):
        print(f"Starting episode {i}")
        observation = env.reset_state()
        observation = sanitize_input(observation)
        print(f"Initial observation: {observation}")
        done = False
        score = 0
        step_count = 0
        while not done:

            print(f" Episode {i}, Step {step_count}")
            action, prob, val = agent.choose_action(observation)
            action = sanitize_input(action)
            prob = sanitize_input(prob)
            val = sanitize_input(val)
            print(f" Action: {action}, Prob: {prob}, Val: {val}")

            old_TX_UAV_locations = np.copy(env.TX_UAVs_locations)

            observation_, reward, done = env.DDPG_step(action)
            observation_ = sanitize_input(observation_)
            reward = sanitize_input(reward)

            print(f" Observation_: {observation_}, Reward: {reward}, Done: {done}")

            if np.isnan(reward) or np.isnan(observation_).any():
                print("NaN detected in reward or next observation. Skipping this step.")
                continue

            n_steps += 1
            score += reward
            agent.remember(observation, action, prob, val, reward, done)
            if n_steps % N == 0:
                agent.learn()
                learn_iters += 1
            observation = observation_
            step_count += 1
        score_history.append(score)
        avg_score = np.mean(score_history[-100:])

        if avg_score > best_score:
            best_score = avg_score
            agent.save_models()

        print('episode', i, 'score %.1f' % score, 'avg score %.1f' % avg_score, 'time_steps', n_steps, 'learning_steps', learn_iters)

    #end_time = time.time()
    #elapsed_time = end_time - start_time

    #print(f"Time taken for {n_games} games: {elapsed_time:.2f} seconds")

    x = [i + 1 for i in range(len(score_history))]
    plot_learning_curve(x, score_history, figure_file)

Hello sir. I have “class PatchMerging3D(nn.Module):” in the below:
class PatchMerging3D(nn.Module): # class PatchMerging3D inherits from nn.Module
“”" 3D Patch Merging Layer

Args:
    input_dim (int): Number of input channels.
    output_dim (int): Number of output channels after reduction.
"""

def __init__(self, input_dim, output_dim, norm_layer=nn.LayerNorm):  # Initializes the PatchMerging3D class with input_dim, output_dim, and an optional norm_layer
    super().__init__()  # Calls the __init__ method of the parent class
    self.reduction = nn.Linear(8 * input_dim, output_dim, bias=False)  # Defines a linear layer to reduce the dimensionality of the input
    self.norm = norm_layer(8 * input_dim)  # Defines a normalization layer

def forward(self, x):  # Defines the forward pass of the network
    # If x is a tuple, extract the first element
    if isinstance(x, tuple):  # Checks if x is a tuple
        x = x[0]  # Extracts the first element of the tuple

    # Ensure x is shaped correctly
    x = x.transpose(1, 4)  # Transposes the input tensor from (B, C, D, H, W) to (B, D, H, W, C)
    B, D, H, W, C = x.shape  # Gets the shape of the input tensor

    # Padding
    pad_input = (H % 2 == 1) or (W % 2 == 1) or (D % 2 == 1)  # Checks if padding is needed
    if pad_input:  # If padding is needed
        x = F.pad(x, (0, 0, 0, D % 2, 0, W % 2, 0, H % 2))  # Pads the input tensor if necessary

    # Perform patch merging
    x0 = x[:, 0::2, 0::2, 0::2, :]  # Selects patches
    x1 = x[:, 0::2, 0::2, 1::2, :]  # Selects patches
    x2 = x[:, 0::2, 1::2, 0::2, :]  # Selects patches
    x3 = x[:, 0::2, 1::2, 1::2, :]  # Selects patches
    x4 = x[:, 1::2, 0::2, 0::2, :]  # Selects patches
    x5 = x[:, 1::2, 0::2, 1::2, :]  # Selects patches
    x6 = x[:, 1::2, 1::2, 0::2, :]  # Selects patches
    x7 = x[:, 1::2, 1::2, 1::2, :]  # Selects patches
    x = torch.cat([x0, x1, x2, x3, x4, x5, x6, x7], -1)  # Concatenates the patches along the last dimension
    print(f"x0 shape: {x0.shape}")
    print(f"x1 shape: {x1.shape}")
    print(f"x2 shape: {x2.shape}")
    print(f"x3 shape: {x3.shape}")
    print(f"x4 shape: {x4.shape}")
    print(f"x5 shape: {x5.shape}")
    print(f"x6 shape: {x6.shape}")
    print(f"x7 shape: {x7.shape}")
    # Flatten x to apply LayerNorm
    x = x.view(B, D // 2, H // 2, W // 2, -1)  # Reshape x to (B, D/2, H/2, W/2, 8 * C)
    print(f"x.view shape: {x.shape}")
    # Apply LayerNorm dynamically
    norm_layer = nn.LayerNorm(x.size()[1:]).to(x.device)  # Dynamically create LayerNorm with correct normalized shape
    x = norm_layer(x)  # Applies LayerNorm
    # Apply linear reduction
    print(f"x shape before reduction: {x.shape}")
    x = self.reduction(x)  # Applies the linear reduction

    # Transpose x back to its original shape
    x = x.transpose(1, 4)  # Transposes the tensor back to (B, C, D/2, H/2, W/2)

    return x  # Returns the output tensor

And after run, I have error:
x0 shape: torch.Size([1, 8, 8, 8, 64])
x1 shape: torch.Size([1, 8, 8, 8, 64])
x2 shape: torch.Size([1, 8, 8, 8, 64])
x3 shape: torch.Size([1, 8, 8, 8, 64])
x4 shape: torch.Size([1, 8, 8, 8, 64])
x5 shape: torch.Size([1, 8, 8, 8, 64])
x6 shape: torch.Size([1, 8, 8, 8, 64])
x7 shape: torch.Size([1, 8, 8, 8, 64])
x.view shape: torch.Size([1, 8, 8, 8, 512])
x shape before reduction: torch.Size([1, 8, 8, 8, 512])
Traceback (most recent call last):
File “train.py”, line 166, in
trainer.fit(net, datamodule=data_module)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 771, in fit
self._call_and_handle_interrupt(
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 724, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 812, in _fit_impl
results = self._run(model, ckpt_path=self.ckpt_path)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 1222, in _run
self._call_callback_hooks(“on_fit_start”)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 1637, in _call_callback_hooks
fn(self, self.lightning_module, *args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\callbacks\model_summary.py”, line 56, in on_fit_start
model_summary = summarize(pl_module, max_depth=self._max_depth)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 427, in summarize
return ModelSummary(lightning_module, max_depth=max_depth)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 187, in init
self._layer_summary = self.summarize()
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 244, in summarize
self._forward_example_input()
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 274, in forward_example_input
model(input
)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1541, in _call_impl
return forward_call(*args, **kwargs)
File “D:\3_3D-EffiViTCaps-main\module\effiViTcaps.py”, line 190, in forward
conv_3_1 = self.patchMergingblock_2(conv_2_1)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1582, in _call_impl
result = forward_call(*args, **kwargs)
File “D:\3_3D-EffiViTCaps-main\main_block\UCTransNet.py”, line 190, in forward
x = self.reduction(x) # Applies the linear reduction
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1541, in _call_impl
return forward_call(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\linear.py”, line 116, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (512x512 and 1024x128)

I have been trying to fix it with reshape, flatten, and view for two days, but I cannot fix it. Could you possibly help and guide me, please? Thanks in advance, sir. Good day!

The issue is raised in self.reduction as 1024 features are expected while the incoming activation uses 512 features only.
Minimal code raising the same error:

x = torch.randn(1, 8, 8, 8, 512)
reduction = nn.Linear(1024, 128, bias=False)

out= reduction(x)

Set in_features=512 for self.reduction and this layer shouldn’t fail anymore.

1 Like

Hello again sir. Good afternoon. Thanks a lot for your help. But after update my snippet patch merging code, I have received new error that is:
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat2 in method wrapper_CUDA_mm).
Error:
Loading dataset: 100%|▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒| 208/208 [00:01<00:00, 198.66it/s]
Loading dataset: 100%|▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒| 52/52 [00:00<00:00, 202.36it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
x0 shape: torch.Size([1, 16, 16, 16, 64])
x1 shape: torch.Size([1, 16, 16, 16, 64])
x2 shape: torch.Size([1, 16, 16, 16, 64])
x3 shape: torch.Size([1, 16, 16, 16, 64])
x4 shape: torch.Size([1, 16, 16, 16, 64])
x5 shape: torch.Size([1, 16, 16, 16, 64])
x6 shape: torch.Size([1, 16, 16, 16, 64])
x7 shape: torch.Size([1, 16, 16, 16, 64])
x.view shape: torch.Size([1, 16, 16, 16, 512])
x shape before reduction: torch.Size([1, 16, 16, 16, 512])
x shape after_reduction: torch.Size([1, 8, 8, 8, 512])
x shape_out: torch.Size([1, 8, 8, 8, 512])
Traceback (most recent call last):
File “train.py”, line 175, in
trainer.fit(net, datamodule=data_module)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 771, in fit
self._call_and_handle_interrupt(
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 724, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 812, in _fit_impl
results = self._run(model, ckpt_path=self.ckpt_path)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 1222, in _run
self._call_callback_hooks(“on_fit_start”)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 1637, in _call_callback_hooks
fn(self, self.lightning_module, *args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\callbacks\model_summary.py”, line 56, in on_fit_start
model_summary = summarize(pl_module, max_depth=self._max_depth)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 427, in summarize
return ModelSummary(lightning_module, max_depth=max_depth)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 187, in init
self._layer_summary = self.summarize()
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 244, in summarize
self._forward_example_input()
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 274, in forward_example_input
model(input
)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1541, in _call_impl
return forward_call(*args, **kwargs)
File “D:\3_3D-EffiViTCaps-main\module\effiViTcaps.py”, line 186, in forward
conv_2_1 = self.patchMergingblock_1(conv_1_1)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1582, in _call_impl
result = forward_call(*args, **kwargs)
File “D:\3_3D-EffiViTCaps-main\main_block\UCTransNet.py”, line 198, in forward
x = self.reduction(out) # Applies the linear reduction
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1541, in _call_impl
return forward_call(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\linear.py”, line 116, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat2 in method wrapper_CUDA_mm)

Updated patch merging class code with your modification:

class PatchMerging3D(nn.Module): # class PatchMerging3D inherits from nn.Module
“”" 3D Patch Merging Layer

Args:
    input_dim (int): Number of input channels.
    output_dim (int): Number of output channels after reduction.
"""

def __init__(self, input_dim, output_dim,
             norm_layer=nn.LayerNorm):  # Initializes the PatchMerging3D class with input_dim, output_dim, and an optional norm_layer
    super().__init__()  # Calls the __init__ method of the parent class
    self.reduction = nn.Linear(8 * input_dim, output_dim,
                               bias=False)  # Defines a linear layer to reduce the dimensionality of the input
    self.norm = norm_layer(8 * input_dim)  # Defines a normalization layer

def forward(self, x):  # Defines the forward pass of the network
    # If x is a tuple, extract the first element
    if isinstance(x, tuple):  # Checks if x is a tuple
        x = x[0]  # Extracts the first element of the tuple

    # Ensure x is shaped correctly
    x = x.transpose(1, 4)  # Transposes the input tensor from (B, C, D, H, W) to (B, D, H, W, C)
    B, D, H, W, C = x.shape  # Gets the shape of the input tensor

    # Padding
    pad_input = (H % 2 == 1) or (W % 2 == 1) or (D % 2 == 1)  # Checks if padding is needed
    if pad_input:  # If padding is needed
        x = F.pad(x, (0, 0, 0, D % 2, 0, W % 2, 0, H % 2))  # Pads the input tensor if necessary

    # Perform patch merging
    x0 = x[:, 0::2, 0::2, 0::2, :]  # Selects patches
    x1 = x[:, 0::2, 0::2, 1::2, :]  # Selects patches
    x2 = x[:, 0::2, 1::2, 0::2, :]  # Selects patches
    x3 = x[:, 0::2, 1::2, 1::2, :]  # Selects patches
    x4 = x[:, 1::2, 0::2, 0::2, :]  # Selects patches
    x5 = x[:, 1::2, 0::2, 1::2, :]  # Selects patches
    x6 = x[:, 1::2, 1::2, 0::2, :]  # Selects patches
    x7 = x[:, 1::2, 1::2, 1::2, :]  # Selects patches
    x = torch.cat([x0, x1, x2, x3, x4, x5, x6, x7], -1)  # Concatenates the patches along the last dimension
    print(f"x0 shape: {x0.shape}")
    print(f"x1 shape: {x1.shape}")
    print(f"x2 shape: {x2.shape}")
    print(f"x3 shape: {x3.shape}")
    print(f"x4 shape: {x4.shape}")
    print(f"x5 shape: {x5.shape}")
    print(f"x6 shape: {x6.shape}")
    print(f"x7 shape: {x7.shape}")
    # Flatten x to apply LayerNorm
    x = x.view(B, D // 2, H // 2, W // 2, -1)  # Reshape x to (B, D/2, H/2, W/2, 8 * C)
    print(f"x.view shape: {x.shape}")
    # Apply LayerNorm dynamically
    norm_layer = nn.LayerNorm(x.size()[1:]).to(
        x.device)  # Dynamically create LayerNorm with correct normalized shape
    x = norm_layer(x)  # Applies LayerNorm
    # Apply linear reduction
    print(f"x shape before reduction: {x.shape}")
    x = torch.randn(1, 8, 8, 8, 512)
    reduction = nn.Linear(512, 128, bias=False)
    print(f"x shape after_reduction: {x.shape}")
    out = reduction(x)
    print(f"x shape_out: {x.shape}")
    x = self.reduction(out)  # Applies the linear reduction

    # Transpose x back to its original shape
    x = x.transpose(1, 4)  # Transposes the tensor back to (B, C, D/2, H/2, W/2)

    return x  # Returns the output tensor

Thanks in advance sir!

Hello sir, I am sorry for the confusion. I just realized that my PatchMerging class successfully processes the input once within the transformer and works well, as I have printed the shapes for verification. However, when my training process reaches the PatchMerging class for the second time, I encounter the following error:


Error:
Traceback (most recent call last):
File “train.py”, line 175, in
trainer.fit(net, datamodule=data_module)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 771, in fit
self._call_and_handle_interrupt(
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 724, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 812, in _fit_impl
results = self._run(model, ckpt_path=self.ckpt_path)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 1222, in _run
self._call_callback_hooks(“on_fit_start”)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 1637, in _call_callback_hooks
fn(self, self.lightning_module, *args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\callbacks\model_summary.py”, line 56, in on_fit_start
model_summary = summarize(pl_module, max_depth=self._max_depth)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 427, in summarize
return ModelSummary(lightning_module, max_depth=max_depth)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 187, in init
self._layer_summary = self.summarize()
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 244, in summarize
self._forward_example_input()
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 274, in forward_example_input
model(input
)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1541, in _call_impl
return forward_call(*args, **kwargs)
File “D:\3_3D-EffiViTCaps-main\module\effiViTcaps.py”, line 190, in forward
conv_3_1 = self.patchMergingblock_2(conv_2_1)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1582, in _call_impl
result = forward_call(*args, **kwargs)
File “D:\3_3D-EffiViTCaps-main\main_block\UCTransNet.py”, line 190, in forward
x = self.reduction(x) # Applies the linear reduction
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1541, in _call_impl
return forward_call(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\linear.py”, line 116, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (512x512 and 1024x128)


“Shapes printed out:”
x0 shape: torch.Size([1, 16, 16, 16, 64])
x1 shape: torch.Size([1, 16, 16, 16, 64])
x2 shape: torch.Size([1, 16, 16, 16, 64])
x3 shape: torch.Size([1, 16, 16, 16, 64])
x4 shape: torch.Size([1, 16, 16, 16, 64])
x5 shape: torch.Size([1, 16, 16, 16, 64])
x6 shape: torch.Size([1, 16, 16, 16, 64])
x7 shape: torch.Size([1, 16, 16, 16, 64])
x.view shape: torch.Size([1, 16, 16, 16, 512])
x shape before reduction: torch.Size([1, 16, 16, 16, 512])
x shape transpose: torch.Size([1, 128, 16, 16, 16])
x1 shape: torch.Size([1, 64, 16, 16])
en1 shape: torch.Size([1, 64, 16, 16, 16])
x1 shape: torch.Size([1, 64, 16, 16, 16])
en1 shape: torch.Size([1, 64, 16, 16, 16])
x2 shape: torch.Size([1, 128, 8, 8])
en2 shape: torch.Size([1, 128, 8, 8, 8])
x2 shape: torch.Size([1, 128, 8, 8, 8])
en2 shape: torch.Size([1, 128, 8, 8, 8])
x3 shape: torch.Size([1, 256, 4, 4])
en3 shape: torch.Size([1, 256, 4, 4, 4])
x3 shape: torch.Size([1, 256, 4, 4, 4])
en3 shape: torch.Size([1, 256, 4, 4, 4])
x4 shape: torch.Size([1, 512, 2, 2])
en4 shape: torch.Size([1, 512, 2, 2, 2])
x4 shape: torch.Size([1, 512, 2, 2, 2])
en4 shape: torch.Size([1, 512, 2, 2, 2])
x0 shape: torch.Size([1, 8, 8, 8, 64])
x1 shape: torch.Size([1, 8, 8, 8, 64])
x2 shape: torch.Size([1, 8, 8, 8, 64])
x3 shape: torch.Size([1, 8, 8, 8, 64])
x4 shape: torch.Size([1, 8, 8, 8, 64])
x5 shape: torch.Size([1, 8, 8, 8, 64])
x6 shape: torch.Size([1, 8, 8, 8, 64])
x7 shape: torch.Size([1, 8, 8, 8, 64])
x.view shape: torch.Size([1, 8, 8, 8, 512])
x shape before reduction: torch.Size([1, 8, 8, 8, 512])


My class patch_merging code:
class PatchMerging3D(nn.Module): # class PatchMerging3D inherits from nn.Module
“”" 3D Patch Merging Layer

Args:
    input_dim (int): Number of input channels.
    output_dim (int): Number of output channels after reduction.
"""

def __init__(self, input_dim, output_dim, norm_layer=nn.LayerNorm):  # Initializes the PatchMerging3D class with input_dim, output_dim, and an optional norm_layer
    super().__init__()  # Calls the __init__ method of the parent class
    self.reduction = nn.Linear(8 * input_dim, output_dim, bias=False)  # Defines a linear layer to reduce the dimensionality of the input
    self.norm = norm_layer(8 * input_dim)  # Defines a normalization layer

def forward(self, x):  # Defines the forward pass of the network
    # If x is a tuple, extract the first element
    if isinstance(x, tuple):  # Checks if x is a tuple
        x = x[0]  # Extracts the first element of the tuple

    # Ensure x is shaped correctly
    x = x.transpose(1, 4)  # Transposes the input tensor from (B, C, D, H, W) to (B, D, H, W, C)
    B, D, H, W, C = x.shape  # Gets the shape of the input tensor

    # Padding
    pad_input = (H % 2 == 1) or (W % 2 == 1) or (D % 2 == 1)  # Checks if padding is needed
    if pad_input:  # If padding is needed
        x = F.pad(x, (0, 0, 0, D % 2, 0, W % 2, 0, H % 2))  # Pads the input tensor if necessary

    # Perform patch merging
    x0 = x[:, 0::2, 0::2, 0::2, :]  # Selects patches
    x1 = x[:, 0::2, 0::2, 1::2, :]  # Selects patches
    x2 = x[:, 0::2, 1::2, 0::2, :]  # Selects patches
    x3 = x[:, 0::2, 1::2, 1::2, :]  # Selects patches
    x4 = x[:, 1::2, 0::2, 0::2, :]  # Selects patches
    x5 = x[:, 1::2, 0::2, 1::2, :]  # Selects patches
    x6 = x[:, 1::2, 1::2, 0::2, :]  # Selects patches
    x7 = x[:, 1::2, 1::2, 1::2, :]  # Selects patches
    x = torch.cat([x0, x1, x2, x3, x4, x5, x6, x7], -1)  # Concatenates the patches along the last dimension
    print(f"x0 shape: {x0.shape}")
    print(f"x1 shape: {x1.shape}")
    print(f"x2 shape: {x2.shape}")
    print(f"x3 shape: {x3.shape}")
    print(f"x4 shape: {x4.shape}")
    print(f"x5 shape: {x5.shape}")
    print(f"x6 shape: {x6.shape}")
    print(f"x7 shape: {x7.shape}")
    # Flatten x to apply LayerNorm
    x = x.view(B, D // 2, H // 2, W // 2, -1)  # Reshape x to (B, D/2, H/2, W/2, 8 * C)
    print(f"x.view shape: {x.shape}")
    # Apply LayerNorm dynamically
    norm_layer = nn.LayerNorm(x.size()[1:]).to(x.device)  # Dynamically create LayerNorm with correct normalized shape
    x = norm_layer(x)  # Applies LayerNorm
    # Apply linear reduction
    print(f"x shape before reduction: {x.shape}")
    x = self.reduction(x)  # Applies the linear reduction

    # Transpose x back to its original shape
    x = x.transpose(1, 4)  # Transposes the tensor back to (B, C, D/2, H/2, W/2)
    print(f"x shape transpose: {x.shape}")
    return x  # Returns the output tensor

First Pass:
x0, x1, …, x7 shapes: [1, 16, 16, 16, 64]
x.view shape: [1, 16, 16, 16, 512]
x shape before reduction: [1, 16, 16, 16, 512]
x shape transpose: [1, 128, 16, 16, 16]

Second Pass:
x0, x1, …, x7 shapes: [1, 8, 8, 8, 64]
x.view shape: [1, 8, 8, 8, 512]
x shape before reduction: [1, 8, 8, 8, 512]
Error occurs here before the transpose.


Analysis:

The first pass through the PatchMerging3D reduces the spatial dimensions by half, and the number of channels changes as expected. However, the second pass seems to be encountering an issue because the shape transformations lead to an unexpected size for the linear layer.


Problem:
The shape mismatch suggests that the self.reduction layer is not correctly initialized for the second pass. The error occurs because the output channels (128) of the nn.Linear layer in the first pass do not match the expected input size in the second pass. The reduction layer expects a size of 512 (from the 8 * input_dim), but it is not updated correctly after the first pass.


Solution:
To ensure the reduction layer is correctly updated for each pass, you need to manage the input dimensions dynamically. One way to do this is to recreate the reduction layer dynamically within the forward method based on the current input shape.


I found analysis, problems, and solutions on the internet, but I couldn’t dynamically recreate the reduction layer within the forward method based on the current input shape. Thanks in advance, sir!

Hello Sir,

I apologize for the misunderstanding regarding the patch merging update yesterday. I have made the necessary modifications today, and it is now working perfectly. I am not a professional in Python, so I truly appreciate your assistance. Thank you very much for your help.

Best regards,

Of course and no need to apologize! :slight_smile:
Based on “it it now working perfectly” I assume all issues are gone or do you still see some errors?

Hi, can someone please help me out?

File 1: MODEL.py


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim  # Add this import
import os

class Linear_QNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Linear_QNet, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = self.linear2(x)
        return x

    def save(self, file_name='model.pth'):
        model_folder_path = './model'
        if not os.path.exists(model_folder_path):
            os.makedirs(model_folder_path)
        file_name = os.path.join(model_folder_path, file_name)
        torch.save(self.state_dict(), file_name)

    def load(self, file_name='model.pth'):
        model_folder_path = './model'
        file_name = os.path.join(model_folder_path, file_name)
        self.load_state_dict(torch.load(file_name))



class QTrainer:
    def __init__(self, model, lr, gamma):
        self.lr = lr
        self.gamma = gamma
        self.model = model
        self.optimizer = optim.Adam(model.parameters(), lr=self.lr)
        self.criterion = nn.MSELoss()

    def train_step(self, state, action, reward, next_state, done):
        state = torch.tensor(state, dtype=torch.float)
        next_state = torch.tensor(next_state, dtype=torch.float)
        action = torch.tensor(action, dtype=torch.long)
        reward = torch.tensor(reward, dtype=torch.float)
        # (n, x)

        if len(state.shape) == 1:
            # (1, x)
            state = torch.unsqueeze(state, 0)
            next_state = torch.unsqueeze(next_state, 0)
            action = torch.unsqueeze(action, 0)
            reward = torch.unsqueeze(reward, 0)
            done = (done, )

        # 1: predicted Q values with current state
        pred = self.model(state)

        target = pred.clone()
        for idx in range(len(done)):
            Q_new = reward[idx]
            if not done[idx]:
                Q_new = reward[idx] + self.gamma * torch.max(self.model(next_state[idx]))

            target[idx][torch.argmax(action[idx]).item()] = Q_new
    
        # 2: Q_new = r + y * max(next_predicted Q value) -> only do this if not done
        # pred.clone()
        # preds[argmax(action)] = Q_new
        self.optimizer.zero_grad()
        loss = self.criterion(target, pred)
        loss.backward()

        self.optimizer.step()

File 2: AGENT.py

import torch
import random
import numpy as np
from collections import deque
from game import SnakeGameAI, Direction, Point
from model import Linear_QNet, QTrainer
from helper import plot

MAX_MEMORY = 100_000
BATCH_SIZE = 1000
LR = 0.001

class Agent:

    def __init__(self):
        self.n_games = 0
        self.epsilon = 0  # randomness
        self.gamma = 0.9  # discount rate
        self.memory = deque(maxlen=MAX_MEMORY)  # popleft()
        self.model = Linear_QNet(17, 256, 3)
        self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)

    def get_state(self, game):
        head = game.snake[0]
        point_l = Point(head.x - 20, head.y)
        point_r = Point(head.x + 20, head.y)
        point_u = Point(head.x, head.y - 20)
        point_d = Point(head.x, head.y + 20)

        point_l2 = Point(head.x - 40, head.y)
        point_r2 = Point(head.x + 40, head.y)
        point_u2 = Point(head.x, head.y - 40)
        point_d2 = Point(head.x, head.y + 40)

        dir_l = game.direction == Direction.LEFT
        dir_r = game.direction == Direction.RIGHT
        dir_u = game.direction == Direction.UP
        dir_d = game.direction == Direction.DOWN

        state = [
            # Danger straight
            (dir_r and game.is_collision(point_r)) or 
            (dir_l and game.is_collision(point_l)) or 
            (dir_u and game.is_collision(point_u)) or 
            (dir_d and game.is_collision(point_d)),

            # Danger right
            (dir_u and game.is_collision(point_r)) or 
            (dir_d and game.is_collision(point_l)) or 
            (dir_l and game.is_collision(point_u)) or 
            (dir_r and game.is_collision(point_d)),

            # Danger left
            (dir_d and game.is_collision(point_r)) or 
            (dir_u and game.is_collision(point_l)) or 
            (dir_r and game.is_collision(point_u)) or 
            (dir_l and game.is_collision(point_d)),

            # Danger two steps straight
            (dir_r and game.is_collision(point_r2)) or 
            (dir_l and game.is_collision(point_l2)) or 
            (dir_u and game.is_collision(point_u2)) or 
            (dir_d and game.is_collision(point_d2)),

            # Danger two steps right
            (dir_u and game.is_collision(point_r2)) or 
            (dir_d and game.is_collision(point_l2)) or 
            (dir_l and game.is_collision(point_u2)) or 
            (dir_r and game.is_collision(point_d2)),

            # Danger two steps left
            (dir_d and game.is_collision(point_r2)) or 
            (dir_u and game.is_collision(point_l2)) or 
            (dir_r and game.is_collision(point_u2)) or 
            (dir_l and game.is_collision(point_d2)),

            # Move direction
            dir_l,
            dir_r,
            dir_u,
            dir_d,

            # Food location
            game.food.x < head.x,  # food left
            game.food.x > head.x,  # food right
            game.food.y < head.y,  # food up
            game.food.y > head.y   # food down
        ]

        return np.array(state, dtype=int)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))  # popleft if MAX_MEMORY is reached

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE)  # list of tuples
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        self.epsilon = 80 - self.n_games
        final_move = [0, 0, 0]
        if random.randint(0, 200) < self.epsilon:
            move = random.randint(0, 2)
            final_move[move] = 1
        else:
            state0 = torch.tensor(state, dtype=torch.float)
            prediction = self.model(state0)
            move = torch.argmax(prediction).item()
            final_move[move] = 1

        return final_move


def train():
    plot_scores = []
    plot_mean_scores = []
    total_score = 0
    record = 0
    agent = Agent()
    game = SnakeGameAI()
    while True:
        # get old state
        state_old = agent.get_state(game)

        # get move
        final_move = agent.get_action(state_old)

        # perform move and get new state
        reward, done, score = game.play_step(final_move)
        state_new = agent.get_state(game)

        # train short memory
        agent.train_short_memory(state_old, final_move, reward, state_new, done)

        # remember
        agent.remember(state_old, final_move, reward, state_new, done)

        if done:
            # train long memory, plot result
            game.reset()
            agent.n_games += 1
            agent.train_long_memory()

            if score > record:
                record = score
                agent.model.save()

            print('Game', agent.n_games, 'Score', score, 'Record:', record)

            plot_scores.append(score)
            total_score += score
            mean_score = total_score / agent.n_games
            plot_mean_scores.append(mean_score)
            plot(plot_scores, plot_mean_scores)


if __name__ == '__main__':
    train()

File 3: GAME.py


import pygame
import random
from enum import Enum
from collections import namedtuple
import numpy as np

pygame.init()
font = pygame.font.Font('arial.ttf', 25)
#font = pygame.font.SysFont('arial', 25)

class Direction(Enum):
    RIGHT = 1
    LEFT = 2
    UP = 3
    DOWN = 4

Point = namedtuple('Point', 'x, y')

# rgb colors
WHITE = (255, 255, 255)
RED = (200,0,0)
GREEN1 = (0, 255, 0)
GREEN2 = (0, 200, 10)
BLACK = (0,0,0)

BLOCK_SIZE = 20
SPEED = 40

class SnakeGameAI:

    def __init__(self, w=640, h=480):
        self.w = w
        self.h = h
        # init display
        self.display = pygame.display.set_mode((self.w, self.h))
        pygame.display.set_caption('Snake')
        self.clock = pygame.time.Clock()
        self.reset()


    def reset(self):
        # init game state
        self.direction = Direction.RIGHT

        self.head = Point(self.w/2, self.h/2)
        self.snake = [self.head,
                      Point(self.head.x-BLOCK_SIZE, self.head.y),
                      Point(self.head.x-(2*BLOCK_SIZE), self.head.y)]

        self.score = 0
        self.food = None
        self._place_food()
        self.frame_iteration = 0


    def _place_food(self):
        x = random.randint(0, (self.w-BLOCK_SIZE )//BLOCK_SIZE )*BLOCK_SIZE
        y = random.randint(0, (self.h-BLOCK_SIZE )//BLOCK_SIZE )*BLOCK_SIZE
        self.food = Point(x, y)
        if self.food in self.snake:
            self._place_food()


    def play_step(self, action):
        self.frame_iteration += 1
        # 1. collect user input
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        
        # 2. move
        self._move(action) # update the head
        self.snake.insert(0, self.head)
        
        # 3. check if game over
        reward = 0
        game_over = False
        if self.is_collision() or self.frame_iteration > 100*len(self.snake):
            game_over = True
            reward = -100 #used to be -10
            return reward, game_over, self.score
            
        # Penalty for getting trapped MADE BY ACE
        if self.is_collision() and not game_over:
            reward = -40
            return reward, game_over, self.score

        # 4. place new food or just move
        if self.head == self.food:
            self.score += 1
            reward = 10
            self._place_food()
        else:
            self.snake.pop()
        
        # 5. update ui and clock
        self._update_ui()
        self.clock.tick(SPEED)
        # 6. return game over and score
        return reward, game_over, self.score


    def is_collision(self, pt=None):
        if pt is None:
            pt = self.head
        # hits boundary
        if pt.x > self.w - BLOCK_SIZE or pt.x < 0 or pt.y > self.h - BLOCK_SIZE or pt.y < 0:
            return True
        # hits itself
        if pt in self.snake[1:]:
            return True

        return False


    def _update_ui(self):
        self.display.fill(BLACK)

        for pt in self.snake:
            pygame.draw.rect(self.display, GREEN1, pygame.Rect(pt.x, pt.y, BLOCK_SIZE, BLOCK_SIZE))
            pygame.draw.rect(self.display, GREEN2, pygame.Rect(pt.x+4, pt.y+4, 12, 12))

        pygame.draw.rect(self.display, RED, pygame.Rect(self.food.x, self.food.y, BLOCK_SIZE, BLOCK_SIZE))

        text = font.render("Score: " + str(self.score), True, WHITE)
        self.display.blit(text, [0, 0])
        pygame.display.flip()


    def _move(self, action):
        # [straight, right, left]

        clock_wise = [Direction.RIGHT, Direction.DOWN, Direction.LEFT, Direction.UP]
        idx = clock_wise.index(self.direction)

        if np.array_equal(action, [1, 0, 0]):
            new_dir = clock_wise[idx] # no change
        elif np.array_equal(action, [0, 1, 0]):
            next_idx = (idx + 1) % 4
            new_dir = clock_wise[next_idx] # right turn r -> d -> l -> u
        else: # [0, 0, 1]
            next_idx = (idx - 1) % 4
            new_dir = clock_wise[next_idx] # left turn r -> u -> l -> d

        self.direction = new_dir

        x = self.head.x
        y = self.head.y
        if self.direction == Direction.RIGHT:
            x += BLOCK_SIZE
        elif self.direction == Direction.LEFT:
            x -= BLOCK_SIZE
        elif self.direction == Direction.DOWN:
            y += BLOCK_SIZE
        elif self.direction == Direction.UP:
            y -= BLOCK_SIZE

        self.head = Point(x, y)

Error:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x14 and 17x256)

I am making a python snake AI that can forsee 2 steps into the future and so I updated the inputs from 11 to 17.

The error is raised in the first linear layer in Linear_QNet since the input contains 14 features while 17 are expected.

Hello sir,

I have fixed that error with your help and suggestions. However, I still have many errors that I need to address and fix step by step. I am using Transformers CCT on a 3D capsules network. With your help, we have successfully passed the first block.

My model is as follows:

Input image
First block: 3D patch merging + Transformer CCT
Second block: 3D patch merging + Transformer CCT
Bottleneck: two 3D capsules network blocks + Transformer CCT
First upsampling block: Transformer CCT + Deconvolution
Second upsampling block: Transformer CCT + Deconvolution
Reconstruction
I am still working on fixing the remaining errors. Thanks again for your help. I will never forget your kindness.

1 Like

Hello Sir,

Good evening. I am encountering an issue with the following code:

Code:
def forward(self, x):
# Contracting
x = self.feature_extractor(x)
# fe_0 = self.feature_extractor.conv1(x)
# fe_1 = self.feature_extractor.conv2(fe_0)
# x = self.feature_extractor.conv3(fe_1)

    conv_1_1 = x

    # conv_2_1 = self.encoder_convs[0](conv_1_1)
    conv_2_1 = self.patchMergingblock_1(conv_1_1)
    conv_2_1 = self.relu(conv_2_1)
    conv_2_1 = self.efficientViT3Dblock_encoder_2(conv_2_1)

    conv_3_1 = self.patchMergingblock_2(conv_2_1)
    # conv_3_1 = self.encoder_convs[1](conv_2_1)
    conv_3_1 = self.relu(conv_3_1)
    conv_3_1 = self.efficientViT3Dblock_encoder_3(conv_3_1)
    # Ensure conv_3_1 is a tensor before calling view
    # Ensure conv_3_1 is a tensor before calling view
    # Ensure conv_3_1 is a tensor before calling view
    # Ensure conv_3_1 is a tensor before calling view
    if isinstance(conv_3_1, tuple):
        conv_3_1 = conv_3_1[0]

    print(f"Shape of conv_3_1 before reshaping: {conv_3_1.shape}")
    num_elements = conv_3_1.numel()
    print(f"Total number of elements in conv_3_1: {num_elements}")

    # Calculate the appropriate shape for reshaping
    # Maintain the correct total number of elements
    # For [1, 64, 8, 8, 8], it should be [1, 8, 8, 8, 8, 8] or any valid

    new_shape = (1, 8, 8, 8, 8, 8)

    print(f"New shape for reshaping: {new_shape}")

    conv_3_1_reshaped = conv_3_1.view(*new_shape)
    print(f"Shape of conv_3_1 after reshaping: {conv_3_1_reshaped.shape}")

    x = self.encoder_conv_caps[0](conv_3_1_reshaped.contiguous())
    # conv_cap_4_1 = x
    conv_cap_4_1 = self.encoder_conv_caps[1](x)

    shape = conv_cap_4_1.size()
    conv_cap_4_1 = conv_cap_4_1.view(shape[0], -1, shape[-3], shape[-2], shape[-1])
    conv_cap_4_1 = self.efficientViT3Dblock_bottleneck(conv_cap_4_1)

    # Expanding
    if self.connection == "skip":
        # ###########################################################################################
        # x = self.patchExpandingblock_3(conv_cap_4_1)
        # x = torch.cat((x, conv_3_1), dim=1)
        # x = self.efficientViT3Dblock_decoder_3(x)
        # x = self.relu(x)
        # x = self.patchExpandingblock_2(x)
        # x = torch.cat((x, conv_2_1), dim=1)
        # x = self.efficientViT3Dblock_decoder_2(x)
        # x = self.relu(x)
        # x = self.patchExpandingblock_1(x)
        # x = torch.cat((x, conv_1_1), dim=1)
        # ###########################################################################################
        x = self.decoder_conv[0](conv_cap_4_1)
        x = torch.cat((x, conv_3_1), dim=1)
        x = self.decoder_conv[1](x)
        #x = self.efficientViT3Dblock_decoder_3(x)
        x = self.decoder_conv[2](x)
        x = torch.cat((x, conv_2_1), dim=1)
        x = self.decoder_conv[3](x)
        x = self.efficientViT3Dblock_decoder_2(x)
        x = self.decoder_conv[4](x)
        x = torch.cat((x, conv_1_1), dim=1)

        # extend decover and skip connection
        # x = self.add_deconvs[0](x)
        # x = torch.cat((x, fe_1), dim=1)
        # x = self.add_deconvs[1](x)
        # x = torch.cat((x, fe_0), dim=1)

    logits = self.decoder_conv[5](x)

    return logits

def training_step(self, batch, batch_idx):
    images, labels = batch["image"], batch["label"]
    # Contracting
    x = self.feature_extractor(images)
    # fe_0 = self.feature_extractor.conv1(x)
    # fe_1 = self.feature_extractor.conv2(fe_0)
    # x = self.feature_extractor.conv3(fe_1)

    conv_1_1 = x

    # conv_2_1 = self.encoder_convs[0](conv_1_1)
    conv_2_1 = self.patchMergingblock_1(conv_1_1)
    conv_2_1 = self.relu(conv_2_1)
    conv_2_1 = self.efficientViT3Dblock_encoder_2(conv_2_1)

    conv_3_1 = self.patchMergingblock_2(conv_2_1)
    # conv_3_1 = self.encoder_convs[1](conv_2_1)
    conv_3_1 = self.relu(conv_3_1)
    conv_3_1 = self.efficientViT3Dblock_encoder_3(conv_3_1)
    # Ensure conv_3_1 is a tensor before calling view
    if isinstance(conv_3_1, tuple):
        conv_3_1 = conv_3_1[0]

    print(f"Shape of conv_3_1 before reshaping: {conv_3_1.shape}")
    num_elements = conv_3_1.numel()
    print(f"Total number of elements in conv_3_1: {num_elements}")

    # Calculate the appropriate shape for reshaping
    # Maintain the correct total number of elements
    # For [1, 64, 8, 8, 8], it should be [1, 8, 8, 8, 8, 8] or any valid

    new_shape = (1, 8, 8, 8, 8, 8)

    print(f"New shape for reshaping: {new_shape}")

    conv_3_1_reshaped = conv_3_1.view(*new_shape)
    print(f"Shape of conv_3_1 after reshaping: {conv_3_1_reshaped.shape}")

    conv_3_1_reshaped = conv_3_1.view(-1, 8, 16, conv_3_1.shape[-1], conv_3_1.shape[-1], conv_3_1.shape[-1])

    x = self.encoder_conv_caps[0](conv_3_1_reshaped.contiguous())
    # conv_cap_4_1 = x
    conv_cap_4_1 = self.encoder_conv_caps[1](x)

    shape = conv_cap_4_1.size()
    conv_cap_4_1 = conv_cap_4_1.view(shape[0], -1, shape[-3], shape[-2], shape[-1])
    conv_cap_4_1 = self.efficientViT3Dblock_bottleneck(conv_cap_4_1)

    # Downsampled predictions
    norm = torch.linalg.norm(conv_cap_4_1, dim=2)

    # Expanding
    if self.connection == "skip":
        # ###########################################################################################
        # x = self.patchExpandingblock_3(conv_cap_4_1)
        # x = torch.cat((x, conv_3_1), dim=1)
        # x = self.efficientViT3Dblock_decoder_3(x)
        # x = self.relu(x)
        # x = self.patchExpandingblock_2(x)
        # x = torch.cat((x, conv_2_1), dim=1)
        # x = self.efficientViT3Dblock_decoder_2(x)
        # x = self.relu(x)
        # x = self.patchExpandingblock_1(x)
        # x = torch.cat((x, conv_1_1), dim=1)
        # ###########################################################################################
        x = self.decoder_conv[0](conv_cap_4_1)
        x = torch.cat((x, conv_3_1), dim=1)
        x = self.decoder_conv[1](x)
        #x = self.efficientViT3Dblock_decoder_3(x)
        x = self.decoder_conv[2](x)
        x = torch.cat((x, conv_2_1), dim=1)
        x = self.decoder_conv[3](x)
        x = self.efficientViT3Dblock_decoder_2(x)
        x = self.decoder_conv[4](x)
        x = torch.cat((x, conv_1_1), dim=1)

        # extend decover and skip connection
        # x = self.add_deconvs[0](x)
        # x = torch.cat((x, fe_1), dim=1)
        # x = self.add_deconvs[1](x)
        # x = torch.cat((x, fe_0), dim=1)

    logits = self.decoder_conv[5](x)

    # Reconstructing
    reconstructions = self.reconstruct_branch(x)

    # Calculating losses
    loss, cls_loss, rec_loss = self.losses(images, labels, norm, logits, reconstructions)

    self.log("margin_loss", cls_loss[0], on_step=False, on_epoch=True, sync_dist=True)
    self.log(f"{self.cls_loss}_loss", cls_loss[1], on_step=False, on_epoch=True, sync_dist=True)
    self.log("reconstruction_loss", rec_loss, on_step=False, on_epoch=True, sync_dist=True)

    return loss

Error:
Traceback (most recent call last):
File “train.py”, line 175, in
trainer.fit(net, datamodule=data_module)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 771, in fit
self._call_and_handle_interrupt(
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 724, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 812, in _fit_impl
results = self._run(model, ckpt_path=self.ckpt_path)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 1222, in _run
self._call_callback_hooks(“on_fit_start”)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 1637, in _call_callback_hooks
fn(self, self.lightning_module, *args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\callbacks\model_summary.py”, line 56, in on_fit_start
model_summary = summarize(pl_module, max_depth=self._max_depth)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 427, in summarize
return ModelSummary(lightning_module, max_depth=max_depth)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 187, in init
self._layer_summary = self.summarize()
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 244, in summarize
self._forward_example_input()
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 274, in forward_example_input
model(input
)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1541, in _call_impl
return forward_call(*args, **kwargs)
File “D:\3_3D-EffiViTCaps-main\module\effiViTcaps.py”, line 245, in forward
x = self.decoder_conv0
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1541, in _call_impl
return forward_call(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\container.py”, line 217, in forward
input = module(input)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1541, in _call_impl
return forward_call(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\conv.py”, line 1104, in forward
return F.conv_transpose3d(
TypeError: conv_transpose3d(): argument ‘input’ (position 1) must be Tensor, not tuple

Thank you!

You are passing a tuple to the first conv layer while a tensor is expected.
Check type(input) before passing it to module. If this is a tensor, check again x inside the forward method.

Hi, Sir. I’m self-learning GNN and trying to implement a heterogeneous link-level prediction. Right now, I am encountering the same type of error: mat1 and mat2 shapes cannot be multiplied (15x242 and 64x64). I tried solutions like flattening and some of your previous suggestions, but I’m not entirely sure because of the way I processed my data (involving bag of words, CountVectorizer, hstack). I’d be very grateful if you can take a look at my attempt:

To Process Fund

boW_2 = {"main_category": fund_node_features["main_category"], "main_characteristics": fund_node_features["main_characteristics"]}
df = pd.DataFrame(boW_2)

vectorizer_1 = CountVectorizer()

vectorizer_2 = CountVectorizer()


#Count frequency of funds' main-category
boW_matrix_category = vectorizer_1.fit_transform(df["main_category"])

# eliminate na values
df['main_characteristics'].fillna('', inplace=True)

boW_matrix_characteristics = vectorizer_2.fit_transform(df["main_characteristics"])

combined_boW_numpy= hstack([boW_matrix_category, boW_matrix_characteristics]).toarray()

print(combined_boW_numpy.shape)

Process Investors

# Sort to define the order of nodes
sorted_df = investors.sort_values(by="User ID").set_index("User ID")

sorted_df = sorted_df.reset_index(drop=False)
investor_id_mapping = sorted_df["User ID"]

# remove spaces
sorted_df.columns = sorted_df.columns.str.strip()

# Select node features: industry
investor_node_features = sorted_df[["Industry"]]

pd.set_option('mode.chained_assignment', None)

# Expand industries
industries = investor_node_features["Industry"].str.split(",", expand=True)
investor_node_features["main_industries"] = industries[0]

investor_node_features = investor_node_features.drop("Industry", axis=1)

boW_1 = {"main_industries": investor_node_features["main_industries"]}
df = pd.DataFrame(boW_1)

vectorizer_investor = CountVectorizer()
investor_matrix = vectorizer_investor.fit_transform(df["main_industries"])

**setup HeteroData: **

data = HeteroData()

investor_matrix_dense = investor_matrix.toarray()


# Load node features
data["investor"].x = torch.tensor(investor_matrix_dense, dtype=torch.long)
data["fund"].x = torch.tensor(combined_boW_numpy, dtype=torch.long)

# Load Edge_index
data['fund'].x = combined_boW_numpy
data['investor', 'interactions', 'fund'].edge_index = torch.tensor(edge_index, dtype=torch.long)


data = T.ToUndirected()(data)

Model

from torch_geometric.nn import SAGEConv, to_hetero
import torch.nn.functional as F

# Define GNN Class

class GNN(torch.nn.Module):
  def __init__(self, hidden_channels):
    super().__init__()

    self.conv1 = SAGEConv(hidden_channels, hidden_channels)
    self.conv2 = SAGEConv(hidden_channels, hidden_channels)

  def forward(self, x: Tensor, edge_index: Tensor):
    # Use relu activation function
    x = self.conv1(x, edge_index)
    x = F.relu(x)
    x = self.conv2(x, edge_index)
    return x


# Define Classifier

class Classifier(torch.nn.Module):
  def forward(self, x_investor: Tensor, x_fund: Tensor, edge_label_index: Tensor):
        # Convert node embeddings to edge-level representations:
        edge_feat_investor = x_investor[edge_label_index[0]]
        edge_feat_fund = x_fund[edge_label_index[1]]

        # Apply dot-product to get a prediction per supervision edge:
        return (edge_feat_investor * edge_feat_fund).sum(dim=-1)

# Define Our Model

class Model(torch.nn.Module):
  def __init__(self, hidden_channels):
    super().__init__()

    # Instantiate homogenous GNN
    self.gnn = GNN(hidden_channels)

    # Convert to heterogeneous
    self.gnn = to_hetero(self.gnn, data.metadata())


    self.classifier = Classifier()

  def forward(self, data: HeteroData):
    x_dict = {
        "investor": data["investor"].x,
        "fund": data["fund"].x,
    }

    # Use edge index for specific edge type
    edge_index_dict = {
            ("investor", "interactions", "fund"): data["investor", "interactions", "fund"].edge_index,
    }

    # Forward pass through GNN
    x_dict = self.gnn(x_dict, edge_index_dict)

    return self.classifier(x_dict["investor"], x_dict["fund"], data["investor", "interactions", "fund"].edge_label_index)


model = Model(hidden_channels = 64)

print(model)

**And this is where the problem appears


**

Hello again, Sir,

Thank you very much. I have fixed the error with your modifications, but I have encountered another error as follows:
##########################################################
Error:
Traceback (most recent call last):
File “train.py”, line 175, in
trainer.fit(net, datamodule=data_module)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 771, in fit
self._call_and_handle_interrupt(
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 724, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 812, in _fit_impl
results = self._run(model, ckpt_path=self.ckpt_path)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 1222, in _run
self._call_callback_hooks(“on_fit_start”)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\trainer\trainer.py”, line 1637, in _call_callback_hooks
fn(self, self.lightning_module, *args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\callbacks\model_summary.py”, line 56, in on_fit_start
model_summary = summarize(pl_module, max_depth=self._max_depth)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 427, in summarize
return ModelSummary(lightning_module, max_depth=max_depth)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 187, in init
self._layer_summary = self.summarize()
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 244, in summarize
self._forward_example_input()
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\pytorch_lightning\utilities\model_summary.py”, line 274, in forward_example_input
model(input
)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1541, in _call_impl
return forward_call(*args, **kwargs)
File “D:\3_3D-EffiViTCaps-main\module\effiViTcaps.py”, line 249, in forward
x = self.decoder_conv0
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1541, in _call_impl
return forward_call(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\container.py”, line 217, in forward
input = module(input)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\module.py”, line 1541, in _call_impl
return forward_call(*args, **kwargs)
File “C:\Users\LGD\anaconda3\envs\EffiViTCaps\lib\site-packages\torch\nn\modules\conv.py”, line 1104, in forward
return F.conv_transpose3d(
RuntimeError: Given transposed=1, weight of size [192, 256, 2, 2, 2], expected input[1, 64, 4, 4, 4] to have 192 channels, but got 64 channels instead

##########################################################

Forward and Trainig:

def forward(self, x):
    # Contracting
    x = self.feature_extractor(x)
    # fe_0 = self.feature_extractor.conv1(x)
    # fe_1 = self.feature_extractor.conv2(fe_0)
    # x = self.feature_extractor.conv3(fe_1)

    conv_1_1 = x

    # conv_2_1 = self.encoder_convs[0](conv_1_1)
    conv_2_1 = self.patchMergingblock_1(conv_1_1)
    conv_2_1 = self.relu(conv_2_1)
    conv_2_1 = self.efficientViT3Dblock_encoder_2(conv_2_1)

    conv_3_1 = self.patchMergingblock_2(conv_2_1)
    # conv_3_1 = self.encoder_convs[1](conv_2_1)
    conv_3_1 = self.relu(conv_3_1)
    conv_3_1 = self.efficientViT3Dblock_encoder_3(conv_3_1)
    # Ensure conv_3_1 is a tensor before calling view
    # Ensure conv_3_1 is a tensor before calling view
    # Ensure conv_3_1 is a tensor before calling view
    # Ensure conv_3_1 is a tensor before calling view
    if isinstance(conv_3_1, tuple):
        conv_3_1 = conv_3_1[0]

    print(f"Shape of conv_3_1 before reshaping: {conv_3_1.shape}")
    num_elements = conv_3_1.numel()
    print(f"Total number of elements in conv_3_1: {num_elements}")

    # Calculate the appropriate shape for reshaping
    # Maintain the correct total number of elements
    # For [1, 64, 8, 8, 8], it should be [1, 8, 8, 8, 8, 8] or any valid

    new_shape = (1, 8, 8, 8, 8, 8)

    print(f"New shape for reshaping: {new_shape}")

    conv_3_1_reshaped = conv_3_1.view(*new_shape)
    print(f"Shape of conv_3_1 after reshaping: {conv_3_1_reshaped.shape}")

    x = self.encoder_conv_caps[0](conv_3_1_reshaped.contiguous())
    # conv_cap_4_1 = x
    conv_cap_4_1 = self.encoder_conv_caps[1](x)

    shape = conv_cap_4_1.size()
    conv_cap_4_1 = conv_cap_4_1.view(shape[0], -1, shape[-3], shape[-2], shape[-1])
    conv_cap_4_1 = self.efficientViT3Dblock_bottleneck(conv_cap_4_1)

    # Expanding
    if self.connection == "skip":
        # ###########################################################################################
        # x = self.patchExpandingblock_3(conv_cap_4_1)
        # x = torch.cat((x, conv_3_1), dim=1)
        # x = self.efficientViT3Dblock_decoder_3(x)
        # x = self.relu(x)
        # x = self.patchExpandingblock_2(x)
        # x = torch.cat((x, conv_2_1), dim=1)
        # x = self.efficientViT3Dblock_decoder_2(x)
        # x = self.relu(x)
        # x = self.patchExpandingblock_1(x)
        # x = torch.cat((x, conv_1_1), dim=1)
        # ###########################################################################################
        # Assuming conv_cap_4_1 is a tuple, extract the first element if needed.
        if isinstance(conv_cap_4_1, tuple):
            conv_cap_4_1 = conv_cap_4_1[0]
        # Proceed with the decoder
        x = self.decoder_conv[0](conv_cap_4_1)
        x = torch.cat((x, conv_3_1), dim=1)
        x = self.decoder_conv[1](x)
        #x = self.efficientViT3Dblock_decoder_3(x)
        x = self.decoder_conv[2](x)
        x = torch.cat((x, conv_2_1), dim=1)
        x = self.decoder_conv[3](x)
        x = self.efficientViT3Dblock_decoder_2(x)
        x = self.decoder_conv[4](x)
        x = torch.cat((x, conv_1_1), dim=1)

        # extend decover and skip connection
        # x = self.add_deconvs[0](x)
        # x = torch.cat((x, fe_1), dim=1)
        # x = self.add_deconvs[1](x)
        # x = torch.cat((x, fe_0), dim=1)

    logits = self.decoder_conv[5](x)

    return logits

def training_step(self, batch, batch_idx):
    images, labels = batch["image"], batch["label"]
    # Contracting
    x = self.feature_extractor(images)
    # fe_0 = self.feature_extractor.conv1(x)
    # fe_1 = self.feature_extractor.conv2(fe_0)
    # x = self.feature_extractor.conv3(fe_1)

    conv_1_1 = x

    # conv_2_1 = self.encoder_convs[0](conv_1_1)
    conv_2_1 = self.patchMergingblock_1(conv_1_1)
    conv_2_1 = self.relu(conv_2_1)
    conv_2_1 = self.efficientViT3Dblock_encoder_2(conv_2_1)

    conv_3_1 = self.patchMergingblock_2(conv_2_1)
    # conv_3_1 = self.encoder_convs[1](conv_2_1)
    conv_3_1 = self.relu(conv_3_1)
    conv_3_1 = self.efficientViT3Dblock_encoder_3(conv_3_1)
    # Ensure conv_3_1 is a tensor before calling view
    if isinstance(conv_3_1, tuple):
        conv_3_1 = conv_3_1[0]

    print(f"Shape of conv_3_1 before reshaping: {conv_3_1.shape}")
    num_elements = conv_3_1.numel()
    print(f"Total number of elements in conv_3_1: {num_elements}")

    # Calculate the appropriate shape for reshaping
    # Maintain the correct total number of elements
    # For [1, 64, 8, 8, 8], it should be [1, 8, 8, 8, 8, 8] or any valid

    new_shape = (1, 8, 8, 8, 8, 8)

    print(f"New shape for reshaping: {new_shape}")

    conv_3_1_reshaped = conv_3_1.view(*new_shape)
    print(f"Shape of conv_3_1 after reshaping: {conv_3_1_reshaped.shape}")

    conv_3_1_reshaped = conv_3_1.view(-1, 8, 16, conv_3_1.shape[-1], conv_3_1.shape[-1], conv_3_1.shape[-1])

    x = self.encoder_conv_caps[0](conv_3_1_reshaped.contiguous())
    # conv_cap_4_1 = x
    conv_cap_4_1 = self.encoder_conv_caps[1](x)

    shape = conv_cap_4_1.size()
    conv_cap_4_1 = conv_cap_4_1.view(shape[0], -1, shape[-3], shape[-2], shape[-1])
    conv_cap_4_1 = self.efficientViT3Dblock_bottleneck(conv_cap_4_1)

    # Downsampled predictions
    norm = torch.linalg.norm(conv_cap_4_1, dim=2)

    # Expanding
    if self.connection == "skip":
        # ###########################################################################################
        # x = self.patchExpandingblock_3(conv_cap_4_1)
        # x = torch.cat((x, conv_3_1), dim=1)
        # x = self.efficientViT3Dblock_decoder_3(x)
        # x = self.relu(x)
        # x = self.patchExpandingblock_2(x)
        # x = torch.cat((x, conv_2_1), dim=1)
        # x = self.efficientViT3Dblock_decoder_2(x)
        # x = self.relu(x)
        # x = self.patchExpandingblock_1(x)
        # x = torch.cat((x, conv_1_1), dim=1)
        # ###########################################################################################
        # Assuming conv_cap_4_1 is a tuple, extract the first element if needed.
        if isinstance(conv_cap_4_1, tuple):
            conv_cap_4_1 = conv_cap_4_1[0]
        # Proceed with the decoder
        x = self.decoder_conv[0](conv_cap_4_1)
        x = torch.cat((x, conv_3_1), dim=1)
        x = self.decoder_conv[1](x)
        #x = self.efficientViT3Dblock_decoder_3(x)
        x = self.decoder_conv[2](x)
        x = torch.cat((x, conv_2_1), dim=1)
        x = self.decoder_conv[3](x)
        x = self.efficientViT3Dblock_decoder_2(x)
        x = self.decoder_conv[4](x)
        x = torch.cat((x, conv_1_1), dim=1)

        # extend decover and skip connection
        # x = self.add_deconvs[0](x)
        # x = torch.cat((x, fe_1), dim=1)
        # x = self.add_deconvs[1](x)
        # x = torch.cat((x, fe_0), dim=1)

    logits = self.decoder_conv[5](x)

    # Reconstructing
    reconstructions = self.reconstruct_branch(x)

    # Calculating losses
    loss, cls_loss, rec_loss = self.losses(images, labels, norm, logits, reconstructions)

    self.log("margin_loss", cls_loss[0], on_step=False, on_epoch=True, sync_dist=True)
    self.log(f"{self.cls_loss}_loss", cls_loss[1], on_step=False, on_epoch=True, sync_dist=True)
    self.log("reconstruction_loss", rec_loss, on_step=False, on_epoch=True, sync_dist=True)

    return loss

##########################################################
Decoder block:
def _build_decoder(self):
# self.add_deconvs = nn.ModuleList()
# self.add_deconvs.append(
# nn.ConvTranspose3d(128, 32, 1, 1)
# )
# self.add_deconvs.append(
# nn.ConvTranspose3d(64, 16, 1, 1)
# )
self.decoder_conv = nn.ModuleList()
if self.connection == “skip”:
self.decoder_in_channels = [self.out_channels * 64, 384, 128, 256, 64, 128]
self.decoder_out_channels = [256, 128, 128, 64, 64, self.out_channels]

    for i in range(6):
        if i == 5:
            self.decoder_conv.append(
                Conv["conv", 3](self.decoder_in_channels[i], self.decoder_out_channels[i], kernel_size=1)
            )

        elif i % 2 == 0:
            self.decoder_conv.append(
                UpSample(
                    dimensions=3,
                    in_channels=self.decoder_in_channels[i],
                    out_channels=self.decoder_out_channels[i],
                    scale_factor=2,
                )
            )
        else:
            self.decoder_conv.append(
                Convolution(
                    dimensions=3,
                    kernel_size=3,
                    in_channels=self.decoder_in_channels[i],
                    out_channels=self.decoder_out_channels[i],
                    strides=1,
                    padding=1,
                    bias=False,
                )
            )

Thanks a lot!!!

The shape mismatch is now caused in a conv layer, which expects a different number of channels in its input activation. Your code is unfortunately not properly formatted and not executable to reproduce the issue, but you can print the activation shape inside the forward method to narrow down which part of the code fails.

Hello, sir. I’m sorry, but I have tried many times, and the error is still the same. Do you have another solution to fix the issue? Thank you.
But I am sure that before the error occurred, it read the decoder method first.


def _build_decoder(self):
    # self.add_deconvs = nn.ModuleList()
    # self.add_deconvs.append(
    #     nn.ConvTranspose3d(128, 32, 1, 1)
    # )
    # self.add_deconvs.append(
    #     nn.ConvTranspose3d(64, 16, 1, 1)
    # )
    self.decoder_conv = nn.ModuleList()
    if self.connection == "skip":
        self.decoder_in_channels = [self.out_channels * 64, 384, 128, 256, 64, 128]
        self.decoder_out_channels = [256, 128, 128, 64, 64, self.out_channels]

    for i in range(6):
        if i == 5:
            self.decoder_conv.append(
                Conv["conv", 3](self.decoder_in_channels[i], self.decoder_out_channels[i], kernel_size=1)
            )

        elif i % 2 == 0:
            self.decoder_conv.append(
                UpSample(
                    dimensions=3,
                    in_channels=self.decoder_in_channels[i],
                    out_channels=self.decoder_out_channels[i],
                    scale_factor=2,
                )
            )
        else:
            self.decoder_conv.append(
                Convolution(
                    dimensions=3,
                    kernel_size=3,
                    in_channels=self.decoder_in_channels[i],
                    out_channels=self.decoder_out_channels[i],
                    strides=1,
                    padding=1,
                    bias=False,
                )
            )

Thanks sir!