Hello,
I am trying to run a simple model using GPU acceleration. I am currently encountering 2 different issues with this.
- Whatever cuda-pytorch combination I use, it always takes around 15 minutes to execute the first instruction on the GPU ( no matter of the instruction executed).
- For my model, I always and error although, it runs fine on the CPU.
For 1), i am trying to install pytorch from source to see if anything works differently.
For 2), I am completely stuck.
The model I am using is below:
import matplotlib.pyplot as plt
import torch
from torchvision import datasets, transforms
import helper
import numpy as npdata_dir = ‘…/Cat_Dog_data’
TODO: Define transforms for the training data and testing data
train_transforms = transforms.Compose([transforms.RandomRotation(30),
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.Grayscale(),
transforms.ToTensor()])test_transforms = transforms.Compose([transforms.Resize(255),
transforms.CenterCrop(224),
transforms.Grayscale(),
transforms.ToTensor()])Pass transforms in here, then run the next cell to see how the transforms look
train_data = datasets.ImageFolder(data_dir + ‘/train’, transform=train_transforms)
test_data = datasets.ImageFolder(data_dir + ‘/test’, transform=test_transforms)trainloader = torch.utils.data.DataLoader(train_data, batch_size=32)
testloader = torch.utils.data.DataLoader(test_data, batch_size=32)
len(test_data)from torch import nn, optim
import torch.nn.functional as Fif torch.cuda.is_available():
dev = “cuda:0”
else:
dev = “cpu”
print("device is: " + dev)
device = torch.device(‘cuda’ if torch.cuda.is_available() else ‘cpu’)
#device = torch.device(dev)class Classifier(nn.Module):
def init(self):
super().init()
self.fc0 = nn.Linear(50176, 784)
self.fc1 = nn.Linear(784, 256)
self.fc2 = nn.Linear(256, 128)
self.fc3 = nn.Linear(128, 64)
self.fc4 = nn.Linear(64, 2)def forward(self, x): # make sure input tensor is flattened print('The shape of X is: ') print(x.shape) x = x.view(x.shape[0], -1) print('The shape of X flattened is: ') print(x.shape) x = F.relu(self.fc0(x)) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = F.relu(self.fc3(x)) x = F.log_softmax(self.fc4(x), dim=1) print('The shape of X is: ') print(x.shape) return x
model= Classifier()
model = model.to(device)
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0003)print(“Our model: \n\n”, model, ‘\n’)
print(“The state dict keys: \n\n”, model.state_dict().keys())epochs = 2
steps = 0train_losses, test_losses, test_acc = , ,
#Training pass
for e in range(epochs):
running_loss = 0
for images, labels in trainloader:
images = images.to(device)
print("Device for images is: ", images.get_device())
labels = labels.to(device)
optimizer.zero_grad()log_ps = model(images) loss = criterion(log_ps, labels) loss.backward() optimizer.step() running_loss +=loss.item() #Validation pass else: with torch.no_grad(): test_loss = 0 running_accuracy = 0 for images, labels in testloader: images = images.to(device) labels = labels.to(device) optimizer.zero_grad() print("Asta vrem, asta vreem: ", images.shape) log_psV = model(images) print(log_psV.shape, labels.shape) test_loss += criterion(log_psV, labels) psV = torch.exp(log_psV) top_p, top_class = psV.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy = torch.mean(equals.type(torch.FloatTensor)) running_accuracy += accuracy.item() train_losses.append(running_loss/len(trainloader)) test_losses.append(test_loss/len(testloader)) test_acc.append(running_accuracy/len(testloader)) print(f'Epoch: {e+1} epochs') print(f'Training Loss: {running_loss/len(trainloader)}') print(f'Test Loss: {test_loss/len(testloader)}') print(f'Test Accuracy: {(running_accuracy/len(testloader))*100}%')
plt.plot(range(epochs), train_losses, label=‘Training Loss’)
plt.plot(range(epochs), test_losses, label=‘Test Loss’)
plt.plot(range(epochs), test_acc, label=‘Accuracy’)
plt.legend()
The output and error I am getting is this:
Our model:
Classifier(
(fc0): Linear(in_features=50176, out_features=784, bias=True)
(fc1): Linear(in_features=784, out_features=256, bias=True)
(fc2): Linear(in_features=256, out_features=128, bias=True)
(fc3): Linear(in_features=128, out_features=64, bias=True)
(fc4): Linear(in_features=64, out_features=2, bias=True)
)
The state dict keys:
odict_keys([‘fc0.weight’, ‘fc0.bias’, ‘fc1.weight’, ‘fc1.bias’, ‘fc2.weight’, ‘fc2.bias’, ‘fc3.weight’, ‘fc3.bias’, ‘fc4.weight’, ‘fc4.bias’])
Device for images is: 0
The shape of X is:
torch.Size([32, 1, 224, 224])
The shape of X flattened is:
torch.Size([32, 50176])
RuntimeError Traceback (most recent call last)
in
63 optimizer.zero_grad()
64
—> 65 log_ps = model(images)
66 loss = criterion(log_ps, labels)
67 loss.backward()
~.conda\envs\pytorch18-cuda111\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
→ 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
in forward(self, x)
31 print(x.shape)
32
—> 33 x = F.relu(self.fc0(x))
34 x = F.relu(self.fc1(x))
35 x = F.relu(self.fc2(x))
~.conda\envs\pytorch18-cuda111\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
→ 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
~.conda\envs\pytorch18-cuda111\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
92
93 def forward(self, input: Tensor) → Tensor:
—> 94 return F.linear(input, self.weight, self.bias)
95
96 def extra_repr(self) → str:
~.conda\envs\pytorch18-cuda111\lib\site-packages\torch\nn\functional.py in linear(input, weight, bias)
1751 if has_torch_function_variadic(input, weight):
1752 return handle_torch_function(linear, (input, weight), input, weight, bias=bias)
→ 1753 return torch._C._nn.linear(input, weight, bias)
1754
1755
RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)
Does anyone have any clue on why I am getting this behaviour?
Thank you in advance,