So my goal is to use pytorch CNN to get multi-output regression results (a pair:[Result 1 ,Result 2]).I did my own datawrangling and extracted the data on my own (all same size RGB pics, torch.Size([3, 480, 672]) and ended up with the following structure.
The pytorch dataloader is already embdded within the Dataset Class
Dict 1 :self.x maps ID ----->group of Images
Dict 2 :self.y maps ID ----->a singuler list containing both labels(hopefully [Result 1 ,Result 2])
Dict 3 :self.z maps ID ----->number of Dict 1’s images for that specific ID
I’m trying to split the data according to pytorch’s API and reach the multi-output CNN structure . most tutorials instruct me to make my own dataset class and define how ceratain methods will work:
here’s the dataset(the path to Juventus is hard coded but i will change it to have the team name as a parameter but I’m fixing the problems for 1 before generalizing) and model code:
class SoccerDataset():
def __init__(self,Verbose=False,transform = transforms.Compose([transforms.ToTensor()])):
#attributes
self.path_str=r'.\Juventus\Home'
# self.x = D_images
# self.y = D_labels
# self.n_samples = pngCounter
self.transform=transform
# data loading
Results = pd.read_csv("Results.csv")
Results=Results[["wyId","gameweek","label","Home","Away","Home Score","Away Score"]]
D_images=dict() #ID--->images
D_labels=dict() #ID--->label
D_NHeatmaps=dict() #ID---->n_heatmaps
for i in range(len(os.listdir(self.path_str))):
path=Path(self.path_str+"/"+os.listdir(self.path_str)[i])
D_NHeatmaps[os.listdir(self.path_str)[i]]=sum(1 for x in path.glob('*') if x.is_file())-5
L_match=list()
path=Path('.\Juventus\Home')
dataset = torchvision.datasets.ImageFolder(self.path_str,transform=self.transform)
pngCounter = sum(1 for x in path.glob('**/*.png') if x.is_file())
print(pngCounter)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=50, shuffle=False)
for tu in range(pngCounter):
image= next(iter(dataloader))
L_match.append(image)
for index in range(len(Results[Results["Home"]=="Juventus"][["Home Score","Away Score"]].values)):
for ID in D_NHeatmaps.keys():
if ((str(Results[Results["Home"]=="Juventus"][["wyId"]].values[index][0])==ID) and (tu%D_NHeatmaps[ID]==0)and(tu>0)):
Result,id_match,TeamName=Results[Results["Home"]=="Juventus"][["Home Score","Away Score"]].values[index][0:2],Results[Results["Home"]=="Juventus"][["wyId"]].values[index][0],Results[Results["Home"]=="Juventus"][["Home"]].values[index][0]#Result,id_match,TeamName
D_labels[str(id_match)]=list(Result)
D_images[str(id_match)]=L_match
L_match.clear()
self.x = D_images
self.y = D_labels
self.z = D_NHeatmaps
self.n_samples = pngCounter
def __getitem__(self, ID):
path = self.path_str
preds = self.x[str(ID)]
trgts = self.y[str(ID)]
sample = {
'predictors' : preds,
'targets' : trgts,
'path': path,
}
return sample
def __len__(self):
# len(dataset)
return self.n_samples
my current model :
class ConvNet(nn.Module):
def __init__(self, numChannels, classes):
# call the parent constructor
super(ConvNet, self).__init__()
# initialize first set of CONV => RELU => POOL layers
self.conv1 = nn.Conv3d(in_channels=3, out_channels=20,kernel_size=(5, 5))
self.relu1 = ReLU()
self.maxpool1 = nn.MaxPool3d(kernel_size=(2, 2), stride=(2, 2))
# initialize second set of CONV => RELU => POOL layers
self.conv2 = nn.Conv3d(in_channels=20, out_channels=50,kernel_size=(5, 5))
self.relu2 = ReLU()
self.maxpool2 = nn.MaxPool3d(kernel_size=(2, 2), stride=(2, 2))
# initialize first (and only) set of FC => RELU layers
self.fc1 = Linear(in_features=322560, out_features=500) #322560=480*672
self.relu3 = ReLU()
# initialize our softmax classifier
self.fc2 = Linear(in_features=500, out_features=2)
self.logSoftmax = LogSoftmax(dim=1)
def forward(self, x):
# pass the input through our first set of CONV => RELU =>
# POOL layers
x = self.conv1(x)
x = self.relu1(x)
x = self.maxpool1(x)
# pass the output from the previous layer through the second
# set of CONV => RELU => POOL layers
x = self.conv2(x)
x = self.relu2(x)
x = self.maxpool2(x)
# flatten the output from the previous layer and pass it
# through our only set of FC => RELU layers
x = flatten(x, 1)
x = self.fc1(x)
x = self.relu3(x)
# pass the output to our softmax classifier to get our output
# predictions
output= self.fc2(x)
#output = self.logSoftmax(x)
# return the output predictions
return output
how do I conform to Pytorch’s API given the current structure to split into train,test ?