multi-output CNN from train test Dictionary manually built

Aymen_Tlili · May 3, 2022, 2:19am

So my goal is to use pytorch CNN to get multi-output regression results (a pair:[Result 1 ,Result 2]).I did my own datawrangling and extracted the data on my own (all same size RGB pics, torch.Size([3, 480, 672]) and ended up with the following structure.
The pytorch dataloader is already embdded within the Dataset Class
Dict 1 :self.x maps ID ----->group of Images
Dict 2 :self.y maps ID ----->a singuler list containing both labels(hopefully [Result 1 ,Result 2])
Dict 3 :self.z maps ID ----->number of Dict 1’s images for that specific ID

I’m trying to split the data according to pytorch’s API and reach the multi-output CNN structure . most tutorials instruct me to make my own dataset class and define how ceratain methods will work:

here’s the dataset(the path to Juventus is hard coded but i will change it to have the team name as a parameter but I’m fixing the problems for 1 before generalizing) and model code:

class SoccerDataset():
def __init__(self,Verbose=False,transform = transforms.Compose([transforms.ToTensor()])):
    #attributes
    self.path_str=r'.\Juventus\Home'
           # self.x = D_images
           # self.y = D_labels
           # self.n_samples = pngCounter
    self.transform=transform

    # data loading
    Results = pd.read_csv("Results.csv")
    Results=Results[["wyId","gameweek","label","Home","Away","Home Score","Away Score"]]
    D_images=dict() #ID--->images
    D_labels=dict() #ID--->label
    D_NHeatmaps=dict() #ID---->n_heatmaps
    for i in range(len(os.listdir(self.path_str))):
        path=Path(self.path_str+"/"+os.listdir(self.path_str)[i])
        D_NHeatmaps[os.listdir(self.path_str)[i]]=sum(1 for x in path.glob('*') if x.is_file())-5
    L_match=list()
    path=Path('.\Juventus\Home')
    dataset = torchvision.datasets.ImageFolder(self.path_str,transform=self.transform)

    pngCounter = sum(1 for x in path.glob('**/*.png') if x.is_file()) 
    print(pngCounter)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=50, shuffle=False)
    for tu in range(pngCounter):
        image= next(iter(dataloader))
        L_match.append(image)
        for index in range(len(Results[Results["Home"]=="Juventus"][["Home Score","Away Score"]].values)):
            for ID in D_NHeatmaps.keys():
                if ((str(Results[Results["Home"]=="Juventus"][["wyId"]].values[index][0])==ID) and (tu%D_NHeatmaps[ID]==0)and(tu>0)):
                    Result,id_match,TeamName=Results[Results["Home"]=="Juventus"][["Home Score","Away Score"]].values[index][0:2],Results[Results["Home"]=="Juventus"][["wyId"]].values[index][0],Results[Results["Home"]=="Juventus"][["Home"]].values[index][0]#Result,id_match,TeamName
                    D_labels[str(id_match)]=list(Result)
                    D_images[str(id_match)]=L_match 
                    L_match.clear()
    self.x = D_images
    self.y = D_labels
    self.z = D_NHeatmaps
    self.n_samples = pngCounter
def __getitem__(self, ID):
    path = self.path_str
    preds = self.x[str(ID)]
    trgts = self.y[str(ID)]
    sample = { 
        'predictors' : preds,
        'targets' : trgts,
        'path': path,
      }
    return sample


def __len__(self):
    # len(dataset)
    return self.n_samples

my current model :

class ConvNet(nn.Module):
def __init__(self, numChannels, classes):
    # call the parent constructor
    super(ConvNet, self).__init__()
    # initialize first set of CONV => RELU => POOL layers
    self.conv1 = nn.Conv3d(in_channels=3, out_channels=20,kernel_size=(5, 5))
    
    
    self.relu1 = ReLU()
    
    
    self.maxpool1 = nn.MaxPool3d(kernel_size=(2, 2), stride=(2, 2))
    
    
    # initialize second set of CONV => RELU => POOL layers
    self.conv2 = nn.Conv3d(in_channels=20, out_channels=50,kernel_size=(5, 5))
    
    self.relu2 = ReLU()
    
    self.maxpool2 = nn.MaxPool3d(kernel_size=(2, 2), stride=(2, 2))
    
    
    # initialize first (and only) set of FC => RELU layers
    self.fc1 = Linear(in_features=322560, out_features=500) #322560=480*672
    self.relu3 = ReLU()
    # initialize our softmax classifier
    self.fc2 = Linear(in_features=500, out_features=2)
    self.logSoftmax = LogSoftmax(dim=1)
    

def forward(self, x):
    # pass the input through our first set of CONV => RELU =>
    # POOL layers
    x = self.conv1(x)
    x = self.relu1(x)
    x = self.maxpool1(x)
    # pass the output from the previous layer through the second
    # set of CONV => RELU => POOL layers
    x = self.conv2(x)
    x = self.relu2(x)
    x = self.maxpool2(x)
    # flatten the output from the previous layer and pass it
    # through our only set of FC => RELU layers
    x = flatten(x, 1)
    x = self.fc1(x)
    x = self.relu3(x)
    # pass the output to our softmax classifier to get our output
    # predictions
    output= self.fc2(x)
    #output = self.logSoftmax(x)
    # return the output predictions
    return output

how do I conform to Pytorch’s API given the current structure to split into train,test ?