Help me please, how to speed up my the algorithm processing on windows 10 with 32 cpus and 64 ram, which takes 30 minutes for each iteration of 10 epoch, i have done the following:
- enter code clausule<< if >>for windows 10
2.I use num_workers = 2 with pin_memory = false, this worked better for me in comparison, bachsize = 10, I have a worker algorithm with 24 processors (pool)
how can i vectorize my algorithm??
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms # add models to the list
import os
import seaborn as sn # for heatmaps
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
import openpyxl
# ignore harmless warnings
import warnings
warnings.filterwarnings("ignore")
def run(device):
root = '../'
excel_file = openpyxl.load_workbook('corridas/corridas.xlsx')
count=20
mm=2
for cor in range(2,1983):
print ("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - ")
print (" Modelo a correr")
print ("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - ")
excel_sheet = excel_file['corrida']
convo = int(excel_sheet.cell(row=cor, column=2).value)
kernel = int(excel_sheet.cell(row=cor, column=3).value)
bias=int(excel_sheet.cell(row=cor, column=4).value)
size=excel_sheet.cell(row=cor, column=5).value
image=int(size)
type=int(excel_sheet.cell(row=cor, column=6).value)
tipo_image=int(type)
biass=bool(bias)
#constantes
paddingg=1
stridee=1
if tipo_image==1:
train_transform = transforms.Compose([
transforms.Grayscale(num_output_channels=1),
transforms.RandomRotation(10,fill=(0,)), # rotate +/- 10 degrees
transforms.RandomHorizontalFlip(), # reverse 50% of images
transforms.Resize(image), # resize shortest side to 224 pixels
transforms.CenterCrop(image), # crop longest side to 224 pixels at center
transforms.ToTensor(),
transforms.Normalize([0.4161,],[0.1688,])
])
test_transform = transforms.Compose([
transforms.Grayscale(num_output_channels=1),
transforms.Resize(image),
transforms.CenterCrop(image),
transforms.ToTensor(),
transforms.Normalize([0.4161,],[0.1688,])
])
inv_normalize = transforms.Normalize(
mean=[0.5], std=[0.5]
)
print("gray")
else:
train_transform = transforms.Compose([
transforms.RandomRotation(10), # rotate +/- 10 degrees
transforms.RandomHorizontalFlip(), # reverse 50% of images
transforms.Resize(image), # resize shortest side to 224 pixels
transforms.CenterCrop(image), # crop longest side to 224 pixels at center
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])
])
test_transform = transforms.Compose([
transforms.Resize(image),
transforms.CenterCrop(image),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])
])
inv_normalize = transforms.Normalize(
mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225],
std=[1/0.229, 1/0.224, 1/0.225]
)
print("RGB")
train_data = datasets.ImageFolder(os.path.join(root, 'train_real'), transform=train_transform)
test_data = datasets.ImageFolder(os.path.join(root, 'validation'), transform=test_transform)
torch.manual_seed(42)
train_loader = DataLoader(train_data, batch_size=10,num_workers=2, pin_memory=False,shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, num_workers=2,pin_memory=False,shuffle=True)
#obtiene los labels o clases del dataset
class_names = train_data.classes
print(class_names)
print(f'Training images available: {len(train_data)}')
print(f'Testing images available: {len(test_data)}')
i=1
f=image#tamaño original de la imagen
###Calcular la dimension del output RELU
for i in range(1, convo+1):
f=(((f-kernel)+(2*paddingg))/stridee)+1
f=f/2
f=int(f)
print (f)
if ( convo == 2 and (kernel==3 or kernel==5) and (biass==False or biass ==True)):
class ConvolutionalNetwork(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(tipo_image, 6, kernel_size=(kernel,kernel), stride=stridee,padding=paddingg,bias=biass)
self.conv2 = nn.Conv2d(6, 16, kernel_size=(kernel,kernel), stride=stridee,padding=paddingg,bias=biass)
self.fc1 = nn.Linear(f*f*16, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 18)#este 18 son las 18 clases
def forward(self, X):
X = F.relu(self.conv1(X))
X = F.max_pool2d(X, 2, 2)#max pooling de 2x2
X = F.relu(self.conv2(X))
X = F.max_pool2d(X, 2, 2)
X = X.view(-1, f*f*16)
X = F.relu(self.fc1(X))
X = F.relu(self.fc2(X))
X = self.fc3(X)
return F.log_softmax(X, dim=1)
torch.manual_seed(101)
CNNmodel = ConvolutionalNetwork()
CNNmodel=CNNmodel.share_memory()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(CNNmodel.parameters(), lr=0.001)
print(CNNmodel)
import time
start_time = time.time()
epochs = 10
max_trn_batch = 800
max_tst_batch = 300
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
# Run the training batches
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
for b, (X_train, y_train) in enumerate(train_loader):
# Limit the number of batches
if b == max_trn_batch:
break
b+=1
X_train = X_train.to(device)
y_train = y_train.to(device)
# Apply the model
y_pred = CNNmodel(X_train)
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%200 == 0:
print(f'epoch: {i:2} batch: {b:4} [{10*b:6}/8000] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
X_test = X_test.to(device)
y_test = y_test.to(device)
# Limit the number of batches
if b == max_tst_batch:
break
# Apply the model
y_val = CNNmodel(X_test)
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
stop=int(time.time() - start_time)
############## confusion matrix##################
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Initialize the prediction and label lists(tensors)
predlist=torch.zeros(0,dtype=torch.long, device='cpu')
lbllist=torch.zeros(0,dtype=torch.long, device='cpu')
with torch.no_grad():
for i, (inputs, classes) in enumerate(test_loader):
inputs = inputs.to(device)
classes = classes.to(device)
outputs = CNNmodel(inputs)
_, preds = torch.max(outputs, 1)
# Append batch prediction results
predlist=torch.cat([predlist,preds.view(-1).cpu()])
lbllist=torch.cat([lbllist,classes.view(-1).cpu()])
from sklearn.metrics import f1_score, precision_score, recall_score
if count<=100:
excel_sheet = excel_file['acc']
excel_sheet.cell(row=mm, column=count).value = round(test_correct[-1].item()*100/3000,2)
excel_sheet = excel_file['prc']
excel_sheet.cell(row=mm, column=count).value =round(precision_score(lbllist.numpy(), predlist.numpy(), average="weighted")*100,2)
excel_sheet = excel_file['recall']
excel_sheet.cell(row=mm, column=count).value =round(recall_score(lbllist.numpy(), predlist.numpy(), average="weighted")*100,2)
excel_sheet = excel_file['f1']
excel_sheet.cell(row=mm, column=count).value =round(f1_score(lbllist.numpy(), predlist.numpy(), average="weighted")*100,2)
excel_sheet = excel_file['times']
excel_sheet.cell(row=mm, column=count).value =stop
count=count+1
else:
count=2
mm=mm+1
excel_file.save('corridas/corridas.xlsx')