Found dtype Double but expected Float

Vincent_Br · April 17, 2022, 7:57pm

#!/usr/bin/env python

coding: utf-8

from pandas import read_csv
import numpy as np
import matplotlib.pyplot as plt
#from outils import GalaxyDataset
import os

import cv2
from tqdm import tqdm
from time import sleep

import random

import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset

import sys
sys.path.append("/Téléchargements/outils")
from outils import CLASSES
from outils import DESCRIPTIONS
print(CLASSES)

In[5]:

class GalaxyDataset(Dataset):
def init(
self,
labels,
img_dir,
inds=None,
ext=".png",
transform=None,
target_transform=None,
):
“”"
Ensemble de données pour les images de galaxies

    :param labels: Fichier avec les images d'entré
    :type labels: str
    :param img_dir: Dossier avec les images
    :type img_dir: str
    :param inds: indices à utiliser pour cet ensemble, defaults to None
    :type inds: List[int], optional
    :param ext: Extension des fichiers images dans img_dir, avec le point, defaults to ".png"
    :type ext: str, optional
    :param transform: Transformation PyTorch pour les images, peuvent être combinnées avec Compose au besoin, defaults to None
    :type transform: Transformation PyTorch pour les classes (probabilités), optional
    :param target_transform: Transofmation , defaults to None
    :type target_transform: utils, optional
    """
    
    self.img_labels = read_csv(labels, index_col=0)  # read file skipping the first column
    self.img_labels.index = self.img_labels.index.astype(str) # index GalaxyID as str

    if inds is not None:
        # reset the index, and GalaxYID is added as a column
        #and select the rows of the train_dataset
        self.img_labels = self.img_labels.reset_index().loc[inds]             
    
    self.img_dir = img_dir
    self.ext = ext
    self.transform = transform
    self.target_transform = target_transform

def __len__(self):
    # Donne la longueur des données
    return len(self.img_labels)

def __getitem__(self, idx):
    # Retourne un tuple avec l'image, les probabilités et l'ID de l'image
    img_id = self.img_labels.iloc[idx,:][0] # select the GalaxyID
    img_path = os.path.join(self.img_dir, img_id + self.ext) # indcate the path to read the image
    img = cv2.imread(img_path) # read the image
    label = torch.tensor(self.img_labels.iloc[idx,:][1:].tolist()) # transform the probab to tansor

    if self.transform:
        img = self.transform(img)
    if self.target_transform:
        label = self.target_transform(label)

    return img, label, img_id

df=read_csv("/home/vincent/Documents/doc.csv",index_col=0)

data=np.genfromtxt("/home/vincent/Documents/doc.csv",delimiter=’,’,skip_header=1)

ID de la galaxie

GalaxyID = data[:,0]

df_index_str=[]

for i in range (len(df.index)):
df_index_str.append(str(df.index[i]))

print(df)

“”""
inds_str=[]
for i in range(len(inds)):
inds_str.append(str(inds[i]))

max_prob=[]
index_max=[]
for i in range(len(CLASSES)):
a=CLASSES[i]
max_prob.append(max(df[a]))
index_max.append(np.argmax(df[a]))
index_galaxy=[]
for i in range(len(CLASSES)):
index_galaxy.append(df.index[index_max[i]])

def img_is_color(img):

if len(img.shape) == 3:
    # Check the color channels to see if they're all the same.
    c1, c2, c3 = img[:, : , 0], img[:, :, 1], img[:, :, 2]
    if (c1 == c2).all() and (c2 == c3).all():
        return True

return False

def show_image_list(list_images, list_titles=None, list_cmaps=None, grid=True, num_cols=2, figsize=(20, 10), title_fontsize=30):
‘’’
Shows a grid of images, where each image is a Numpy array. The images can be either
RGB or grayscale.

Parameters:
----------
images: list
    List of the images to be displayed.
list_titles: list or None
    Optional list of titles to be shown for each image.
list_cmaps: list or None
    Optional list of cmap values for each image. If None, then cmap will be
    automatically inferred.
grid: boolean
    If True, show a grid over each image
num_cols: int
    Number of columns to show.
figsize: tuple of width, height
    Value to be passed to pyplot.figure()
title_fontsize: int
    Value to be passed to set_title().
'''

assert isinstance(list_images, list)
assert len(list_images) > 0
assert isinstance(list_images[0], np.ndarray)

if list_titles is not None:
    assert isinstance(list_titles, list)
    assert len(list_images) == len(list_titles), '%d imgs != %d titles' % (len(list_images), len(list_titles))

if list_cmaps is not None:
    assert isinstance(list_cmaps, list)
    assert len(list_images) == len(list_cmaps), '%d imgs != %d cmaps' % (len(list_images), len(list_cmaps))

num_images  = len(list_images)
num_cols    = min(num_images, num_cols)
num_rows    = int(num_images / num_cols) + (1 if num_images % num_cols != 0 else 0)

# Create a grid of subplots.
fig, axes = plt.subplots(num_rows, num_cols, figsize=figsize)

# Create list of axes for easy iteration.
if isinstance(axes, np.ndarray):
    list_axes = list(axes.flat)
else:
    list_axes = [axes]

for i in range(num_images):

    img    = list_images[i]
    title  = list_titles[i] if list_titles is not None else 'Image %d' % (i)
    cmap   = list_cmaps[i] if list_cmaps is not None else (None if img_is_color(img) else 'gray')
    
    list_axes[i].imshow(img, cmap=cmap)
    list_axes[i].set_title(title, fontsize=title_fontsize) 
    list_axes[i].grid(grid)

for i in range(num_images, len(list_axes)):
    list_axes[i].set_visible(False)

fig.tight_layout()
_ = plt.show()

In[21]:

list_images=[]
list_title=DESCRIPTIONS
print(list_title)
for i in range(len(CLASSES)):
index_instr=str(index_galaxy[i])
path=r"/home/vincent/Documents/images_training_rev1/"+index_instr+".jpg" #a voir
img = cv2.imread(path)
list_images.append(img)

show_image_list(list_images,list_title, num_cols=4,title_fontsize=10,figsize=(20, 20))

####### fin Etape1

Etape 2

ATTENTION NE PAS EXECUTUER CETTE CELLULE

In[ ]:

“”"
#reduire la dim des images et les sauvgarder dans un niuveau dossier.

######for i in tqdm(range(len(df.index))): #C’est quoi tqdm ?
for i in range(len(df.index)):

#specifier le Chemin ou lire l’image

path=r"/home/vincent/Documents/images_training_rev1/"+str(df.index[i])+".jpg"

#read the image 424x424

img = cv2.imread(path, 1)

Extracting the Region of Interest (ROI): (212x212) à partir du centre

half = img[106:318, 106:318]

#resize the ROI 64x64 pixels
img_64 = cv2.resize(half, (64, 64))

#saving the output of cv2.resize in new file as png image.
cv2.imwrite(r"/home/vincent/Documents/Image64/"+str(df.index[i])+".png",img_64)

sleep(0.02)      #obligé sleep ???

“”"

In[22]:

dataset_size = len(GalaxyID) # taille de data
test_size = int(0.2* dataset_size) # On veut que 20 % des images servent aux tests.
train_size = dataset_size - test_size # taille de datat disponible pour l’entrainement

On split aleatoirement les data en 2 deux sous-ensembles

train_dataset, test_dataset = random_split(GalaxyID,
[train_size, test_size])

indices des deux ensembles dans GalaxyID

train_inds = list(np.where(train_dataset[:][i]==GalaxyID)[0][0] for i in
tqdm(range(len(train_dataset[:]))))

test_inds = list(np.where(test_dataset[:][i]==GalaxyID)[0][0] for i in
tqdm(range(len(test_dataset[:]))))

In[215]:

call GalaxyDataset to return the image, probab and GalaxyID

train_model = GalaxyDataset("/home/vincent/Documents/doc.csv",r"/home/vincent/Documents/Image64/",train_inds,transform=ToTensor())

test_model = GalaxyDataset("/home/vincent/Documents/doc.csv",r"/home/vincent/Documents/Image64/",test_inds,transform=ToTensor())

“”" DataLoader gives us batches of 64 images from trainsets,
randomizing their order (shuffle=True) “”"
train_loader = DataLoader(train_model, batch_size = 64, shuffle=True)
test_loader = DataLoader(test_model, batch_size = 64, shuffle=True)

In[37]:

list_images=[]
list_title=DESCRIPTIONS
print(list_title)
for i in range(len(CLASSES)):
index_instr=str(index_galaxy[i])
path=r"/home/vincent/Documents/Image64/"+index_instr+".png" #a voir
img = cv2.imread(path)
list_images.append(img)

show_image_list(list_images,list_title, num_cols=4,title_fontsize=10,figsize=(20, 20))

####### fin Etape2

ETAPE 3

from torch import nn
import utils as ut

def get_output_width(input_width, kernel_size, stride=1, padding=0, dilation=1):
# Fonction pour calculer la largeur de sortie d’une couche de convolution
# Ref: Conv2d — PyTorch 1.11.0 documentation
# Attention: stride=kernel_size pour max pooling par défaut

return int(
    (input_width + 2 * padding - dilation * (kernel_size - 1) - 1) / stride + 1
)

device = “cuda” if torch.cuda.is_available() else “cpu”

class ConvNet(nn.Module):

def __init__(self):
    
    super(ConvNet, self).__init__()
    # NOTE: On pourrait avoir une seule séquence. J'ai séparé pour la clarté

    # Pooling 2x2, assigné à pool car on le réutilise plusieurs fois
    self.pool = nn.MaxPool2d(2)

    # Séquence de convolutions
    self.conv_stack = nn.Sequential(
        #nn.Conv2d(1, 6, 5,padding=3),
        nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5,padding=3),
        # batch normalization normalise les poids dans une couche
        nn.BatchNorm2d(6),
        nn.ReLU(),
        self.pool,
        nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5,padding=3),
        nn.BatchNorm2d(16),
        nn.ReLU(),
        self.pool,
    )

    # Opérations pour convertir les images 2d en 1d
    self.flatten = nn.Flatten(start_dim=1)
    # stride=kernel_size pour maxpool par défaut
    self.linear_entry_dim =2
    """
    self.linear_entry_dim = get_output_width(get_output_width(
        get_output_width(get_output_width(64, 5,padding=3), 5, stride=2,padding=3), 5
    ), 5, stride=2,padding=3)
    """
    
    # Séquence de couches pleinement connectées
    self.linear_stack = nn.Sequential(
        # Dropout de 20% des unités dans la prochaine couche
        # nn.Dropout(p=0.2),
        #nn.Linear(16 * self.linear_entry_dim**2, 120),
        nn.Linear(4624 , 120),
        nn.ReLU(),
        nn.Linear(120, 84),
        nn.ReLU(),
        nn.Linear(84, 37),
    )

def forward(self, x):
    print(x.shape)
    # La séquence de convolution d'abbord
    x = self.conv_stack(x)
    print(x.shape)
    # On applatit l'image
    x = self.flatten(x)
    # La séquence pleinement connectée
    logits = self.linear_stack(x)
    return logits

#Utilisez une fonction sigmoide pour transforméé les sorties linéaires (logits) en valeurs entre 0 et 1. ?

model = ConvNet().to(device)

labels_map = {
0: “Smooth”,
1: “Featured or disc”,
2: “Star or artifact”,
3: “Edge on”,
4: “Not edge on”,
5: “Bar through center”,
6: “No bar”,
7: “Spiral”,
8: “No Spiral”,
9: “No bulge”,
10: “No bulge”,
11: “Just noticeable bulge”,
12: “Obvious bulge”,
13: “Dominant bulge”,
14: “Odd Feature”,
15: “No Odd Feature”,
16: “Completely round”,
17: “In between”,
18: “Cigar shaped”,
19: “Ring”,
20: “Lens or arc”,
21: “Disturbed”,
22: “Irregular”,
23: “Other”,
24: “Merger”,
25: “Dust lane”,
26: “Rounded bulge”,
27: “Boxy bulge”,
28: “No bulge”,
29: “Tightly wound arms”,
30: “Medium wound arms”,
31: “Loose wound arms”,
32: “1 Spiral Arm”,
33: “2 Spiral Arms”,
34: “3 Spiral Arms”,
35: “4 Spiral Arms”,
36: “More than four Spiral Arms”,
37: “Can’t tell how many spiral arms”,

}

inv_labels_map = dict(zip(labels_map.values(), labels_map.keys()))

learning_rate = 1e-3
batch_size = 64
epochs = 6

loss_fn = nn.MSELoss()

#Adam est un optimiseur plus flexible que SGD
optimizer = torch.optim.Adam(model.parameters())

def train_loop(dataloader, model, loss_fn, optimizer):

# Taille de l'ensemble d'entraînement
size = len(dataloader.dataset)

# Itération sur les batches (dataloader nous donne les données par batch)
# X est l'image et y la classe
train_loss = 0.0
for batch,(X,y,z) in enumerate(dataloader):
    # Fonction objectif
    pred = model(X)  # prédiction
    loss = loss_fn(pred, y)

    # Rétropropagation
    optimizer.zero_grad()  # On réinitialise le gradient d'abord
    loss.backward()  # Rétropropagation
    optimizer.step()  # On fait un pas dans l'espace paramètre

    loss, current = loss.item(), batch * len(X)
    train_loss += loss * X.size(0)
    # Progrès
    if batch % 100 == 0:
        print(f"Loss: {loss}, [{current}/{size}]")

return train_loss / size

def test_loop(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
test_loss = correct = 0

# On se sauve les gradients comme ils ne sont pas utilisés
with torch.no_grad():
    for X, y in dataloader:
        pred = model(X)
        test_loss += loss_fn(pred, y).item()  # Compute loss
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()

test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct)}%, Avg loss: {test_loss} \n")

return test_loss

#HERE MY PROBLEM

epochs = 6
train_losses = []
test_losses = []
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train_loss = train_loop(train_loader, model, loss_fn, optimizer)
train_losses.append(train_loss)
test_loss = test_loop(test_loader, model, loss_fn)
test_losses.append(test_loss)
print(“Done!”)

Hi, im trying to create a network with my pytorch but Im stuck on this error for more than 6h now
“Found dtype Double but expected Float” that occur when I call my training loop. Can someone help me pls ?

ptrblck · April 18, 2022, 6:38am

Your code is hard to read as you didn’t format it. The error message is raised e.g. if your input data and the model parameters show a dtype mismatch.
I would guess that your data might be a DoubleTensor, so call x = x.float() on the input tensor before passing it to the model.