#!/usr/bin/env python
coding: utf-8
from pandas import read_csv
import numpy as np
import matplotlib.pyplot as plt
#from outils import GalaxyDataset
import os
import cv2
from tqdm import tqdm
from time import sleep
import random
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset
import sys
sys.path.append("/Téléchargements/outils")
from outils import CLASSES
from outils import DESCRIPTIONS
print(CLASSES)
In[5]:
class GalaxyDataset(Dataset):
def init(
self,
labels,
img_dir,
inds=None,
ext=".png",
transform=None,
target_transform=None,
):
“”"
Ensemble de données pour les images de galaxies
:param labels: Fichier avec les images d'entré
:type labels: str
:param img_dir: Dossier avec les images
:type img_dir: str
:param inds: indices à utiliser pour cet ensemble, defaults to None
:type inds: List[int], optional
:param ext: Extension des fichiers images dans img_dir, avec le point, defaults to ".png"
:type ext: str, optional
:param transform: Transformation PyTorch pour les images, peuvent être combinnées avec Compose au besoin, defaults to None
:type transform: Transformation PyTorch pour les classes (probabilités), optional
:param target_transform: Transofmation , defaults to None
:type target_transform: utils, optional
"""
self.img_labels = read_csv(labels, index_col=0) # read file skipping the first column
self.img_labels.index = self.img_labels.index.astype(str) # index GalaxyID as str
if inds is not None:
# reset the index, and GalaxYID is added as a column
#and select the rows of the train_dataset
self.img_labels = self.img_labels.reset_index().loc[inds]
self.img_dir = img_dir
self.ext = ext
self.transform = transform
self.target_transform = target_transform
def __len__(self):
# Donne la longueur des données
return len(self.img_labels)
def __getitem__(self, idx):
# Retourne un tuple avec l'image, les probabilités et l'ID de l'image
img_id = self.img_labels.iloc[idx,:][0] # select the GalaxyID
img_path = os.path.join(self.img_dir, img_id + self.ext) # indcate the path to read the image
img = cv2.imread(img_path) # read the image
label = torch.tensor(self.img_labels.iloc[idx,:][1:].tolist()) # transform the probab to tansor
if self.transform:
img = self.transform(img)
if self.target_transform:
label = self.target_transform(label)
return img, label, img_id
df=read_csv("/home/vincent/Documents/doc.csv",index_col=0)
data=np.genfromtxt("/home/vincent/Documents/doc.csv",delimiter=’,’,skip_header=1)
ID de la galaxie
GalaxyID = data[:,0]
df_index_str=[]
for i in range (len(df.index)):
df_index_str.append(str(df.index[i]))
print(df)
“”""
inds_str=[]
for i in range(len(inds)):
inds_str.append(str(inds[i]))
max_prob=[]
index_max=[]
for i in range(len(CLASSES)):
a=CLASSES[i]
max_prob.append(max(df[a]))
index_max.append(np.argmax(df[a]))
index_galaxy=[]
for i in range(len(CLASSES)):
index_galaxy.append(df.index[index_max[i]])
def img_is_color(img):
if len(img.shape) == 3:
# Check the color channels to see if they're all the same.
c1, c2, c3 = img[:, : , 0], img[:, :, 1], img[:, :, 2]
if (c1 == c2).all() and (c2 == c3).all():
return True
return False
def show_image_list(list_images, list_titles=None, list_cmaps=None, grid=True, num_cols=2, figsize=(20, 10), title_fontsize=30):
‘’’
Shows a grid of images, where each image is a Numpy array. The images can be either
RGB or grayscale.
Parameters:
----------
images: list
List of the images to be displayed.
list_titles: list or None
Optional list of titles to be shown for each image.
list_cmaps: list or None
Optional list of cmap values for each image. If None, then cmap will be
automatically inferred.
grid: boolean
If True, show a grid over each image
num_cols: int
Number of columns to show.
figsize: tuple of width, height
Value to be passed to pyplot.figure()
title_fontsize: int
Value to be passed to set_title().
'''
assert isinstance(list_images, list)
assert len(list_images) > 0
assert isinstance(list_images[0], np.ndarray)
if list_titles is not None:
assert isinstance(list_titles, list)
assert len(list_images) == len(list_titles), '%d imgs != %d titles' % (len(list_images), len(list_titles))
if list_cmaps is not None:
assert isinstance(list_cmaps, list)
assert len(list_images) == len(list_cmaps), '%d imgs != %d cmaps' % (len(list_images), len(list_cmaps))
num_images = len(list_images)
num_cols = min(num_images, num_cols)
num_rows = int(num_images / num_cols) + (1 if num_images % num_cols != 0 else 0)
# Create a grid of subplots.
fig, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
# Create list of axes for easy iteration.
if isinstance(axes, np.ndarray):
list_axes = list(axes.flat)
else:
list_axes = [axes]
for i in range(num_images):
img = list_images[i]
title = list_titles[i] if list_titles is not None else 'Image %d' % (i)
cmap = list_cmaps[i] if list_cmaps is not None else (None if img_is_color(img) else 'gray')
list_axes[i].imshow(img, cmap=cmap)
list_axes[i].set_title(title, fontsize=title_fontsize)
list_axes[i].grid(grid)
for i in range(num_images, len(list_axes)):
list_axes[i].set_visible(False)
fig.tight_layout()
_ = plt.show()
In[21]:
list_images=[]
list_title=DESCRIPTIONS
print(list_title)
for i in range(len(CLASSES)):
index_instr=str(index_galaxy[i])
path=r"/home/vincent/Documents/images_training_rev1/"+index_instr+".jpg" #a voir
img = cv2.imread(path)
list_images.append(img)
show_image_list(list_images,list_title, num_cols=4,title_fontsize=10,figsize=(20, 20))
####### fin Etape1
Etape 2
ATTENTION NE PAS EXECUTUER CETTE CELLULE
In[ ]:
“”"
#reduire la dim des images et les sauvgarder dans un niuveau dossier.
######for i in tqdm(range(len(df.index))): #C’est quoi tqdm ?
for i in range(len(df.index)):
#specifier le Chemin ou lire l’image
path=r"/home/vincent/Documents/images_training_rev1/"+str(df.index[i])+".jpg"
#read the image 424x424
img = cv2.imread(path, 1)
Extracting the Region of Interest (ROI): (212x212) à partir du centre
half = img[106:318, 106:318]
#resize the ROI 64x64 pixels
img_64 = cv2.resize(half, (64, 64))
#saving the output of cv2.resize in new file as png image.
cv2.imwrite(r"/home/vincent/Documents/Image64/"+str(df.index[i])+".png",img_64)
sleep(0.02) #obligé sleep ???
“”"
In[22]:
dataset_size = len(GalaxyID) # taille de data
test_size = int(0.2* dataset_size) # On veut que 20 % des images servent aux tests.
train_size = dataset_size - test_size # taille de datat disponible pour l’entrainement
On split aleatoirement les data en 2 deux sous-ensembles
train_dataset, test_dataset = random_split(GalaxyID,
[train_size, test_size])
indices des deux ensembles dans GalaxyID
train_inds = list(np.where(train_dataset[:][i]==GalaxyID)[0][0] for i in
tqdm(range(len(train_dataset[:]))))
test_inds = list(np.where(test_dataset[:][i]==GalaxyID)[0][0] for i in
tqdm(range(len(test_dataset[:]))))
In[215]:
call GalaxyDataset to return the image, probab and GalaxyID
train_model = GalaxyDataset("/home/vincent/Documents/doc.csv",r"/home/vincent/Documents/Image64/",train_inds,transform=ToTensor())
test_model = GalaxyDataset("/home/vincent/Documents/doc.csv",r"/home/vincent/Documents/Image64/",test_inds,transform=ToTensor())
“”" DataLoader gives us batches of 64 images from trainsets,
randomizing their order (shuffle=True) “”"
train_loader = DataLoader(train_model, batch_size = 64, shuffle=True)
test_loader = DataLoader(test_model, batch_size = 64, shuffle=True)
In[37]:
list_images=[]
list_title=DESCRIPTIONS
print(list_title)
for i in range(len(CLASSES)):
index_instr=str(index_galaxy[i])
path=r"/home/vincent/Documents/Image64/"+index_instr+".png" #a voir
img = cv2.imread(path)
list_images.append(img)
show_image_list(list_images,list_title, num_cols=4,title_fontsize=10,figsize=(20, 20))
####### fin Etape2
ETAPE 3
from torch import nn
import utils as ut
def get_output_width(input_width, kernel_size, stride=1, padding=0, dilation=1):
# Fonction pour calculer la largeur de sortie d’une couche de convolution
# Ref: Conv2d — PyTorch 1.11.0 documentation
# Attention: stride=kernel_size pour max pooling par défaut
return int(
(input_width + 2 * padding - dilation * (kernel_size - 1) - 1) / stride + 1
)
device = “cuda” if torch.cuda.is_available() else “cpu”
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
# NOTE: On pourrait avoir une seule séquence. J'ai séparé pour la clarté
# Pooling 2x2, assigné à pool car on le réutilise plusieurs fois
self.pool = nn.MaxPool2d(2)
# Séquence de convolutions
self.conv_stack = nn.Sequential(
#nn.Conv2d(1, 6, 5,padding=3),
nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5,padding=3),
# batch normalization normalise les poids dans une couche
nn.BatchNorm2d(6),
nn.ReLU(),
self.pool,
nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5,padding=3),
nn.BatchNorm2d(16),
nn.ReLU(),
self.pool,
)
# Opérations pour convertir les images 2d en 1d
self.flatten = nn.Flatten(start_dim=1)
# stride=kernel_size pour maxpool par défaut
self.linear_entry_dim =2
"""
self.linear_entry_dim = get_output_width(get_output_width(
get_output_width(get_output_width(64, 5,padding=3), 5, stride=2,padding=3), 5
), 5, stride=2,padding=3)
"""
# Séquence de couches pleinement connectées
self.linear_stack = nn.Sequential(
# Dropout de 20% des unités dans la prochaine couche
# nn.Dropout(p=0.2),
#nn.Linear(16 * self.linear_entry_dim**2, 120),
nn.Linear(4624 , 120),
nn.ReLU(),
nn.Linear(120, 84),
nn.ReLU(),
nn.Linear(84, 37),
)
def forward(self, x):
print(x.shape)
# La séquence de convolution d'abbord
x = self.conv_stack(x)
print(x.shape)
# On applatit l'image
x = self.flatten(x)
# La séquence pleinement connectée
logits = self.linear_stack(x)
return logits
#Utilisez une fonction sigmoide pour transforméé les sorties linéaires (logits) en valeurs entre 0 et 1. ?
model = ConvNet().to(device)
labels_map = {
0: “Smooth”,
1: “Featured or disc”,
2: “Star or artifact”,
3: “Edge on”,
4: “Not edge on”,
5: “Bar through center”,
6: “No bar”,
7: “Spiral”,
8: “No Spiral”,
9: “No bulge”,
10: “No bulge”,
11: “Just noticeable bulge”,
12: “Obvious bulge”,
13: “Dominant bulge”,
14: “Odd Feature”,
15: “No Odd Feature”,
16: “Completely round”,
17: “In between”,
18: “Cigar shaped”,
19: “Ring”,
20: “Lens or arc”,
21: “Disturbed”,
22: “Irregular”,
23: “Other”,
24: “Merger”,
25: “Dust lane”,
26: “Rounded bulge”,
27: “Boxy bulge”,
28: “No bulge”,
29: “Tightly wound arms”,
30: “Medium wound arms”,
31: “Loose wound arms”,
32: “1 Spiral Arm”,
33: “2 Spiral Arms”,
34: “3 Spiral Arms”,
35: “4 Spiral Arms”,
36: “More than four Spiral Arms”,
37: “Can’t tell how many spiral arms”,
}
inv_labels_map = dict(zip(labels_map.values(), labels_map.keys()))
learning_rate = 1e-3
batch_size = 64
epochs = 6
loss_fn = nn.MSELoss()
#Adam est un optimiseur plus flexible que SGD
optimizer = torch.optim.Adam(model.parameters())
def train_loop(dataloader, model, loss_fn, optimizer):
# Taille de l'ensemble d'entraînement
size = len(dataloader.dataset)
# Itération sur les batches (dataloader nous donne les données par batch)
# X est l'image et y la classe
train_loss = 0.0
for batch,(X,y,z) in enumerate(dataloader):
# Fonction objectif
pred = model(X) # prédiction
loss = loss_fn(pred, y)
# Rétropropagation
optimizer.zero_grad() # On réinitialise le gradient d'abord
loss.backward() # Rétropropagation
optimizer.step() # On fait un pas dans l'espace paramètre
loss, current = loss.item(), batch * len(X)
train_loss += loss * X.size(0)
# Progrès
if batch % 100 == 0:
print(f"Loss: {loss}, [{current}/{size}]")
return train_loss / size
def test_loop(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
test_loss = correct = 0
# On se sauve les gradients comme ils ne sont pas utilisés
with torch.no_grad():
for X, y in dataloader:
pred = model(X)
test_loss += loss_fn(pred, y).item() # Compute loss
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct)}%, Avg loss: {test_loss} \n")
return test_loss
#HERE MY PROBLEM
epochs = 6
train_losses = []
test_losses = []
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train_loss = train_loop(train_loader, model, loss_fn, optimizer)
train_losses.append(train_loss)
test_loss = test_loop(test_loader, model, loss_fn)
test_losses.append(test_loss)
print(“Done!”)
Hi, im trying to create a network with my pytorch but Im stuck on this error for more than 6h now
“Found dtype Double but expected Float” that occur when I call my training loop. Can someone help me pls ?