High CPU consumption

RegularGuY · June 11, 2018, 4:37pm

This is my entire code for a small classification problem,The problem is that when i run this it consumes a lot of CPU ,I am not sure which part of the code is responsible for it,

Any ideas or suggestions will be really helpful,
Thanks in advance ,

import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch.optim as optim
import torchvision
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from tensorboardX import SummaryWriter
import setproctitle
from tensorboard_logger import log_value, configure

RUN_PATH = '/mnt/da5df9e4-cdc6-4d55-91e8-b2383e89165f/Ryan/1234/models'
configure(RUN_PATH, flush_secs=1)
setproctitle.setproctitle('train.py')
writer =  SummaryWriter('/mnt/machine/Ryan/1234/models')
    
net = Net().cuda()
print (net)
transform = transforms.Compose([
    transforms.Scale((224,224)),
    transforms.ToTensor()
])

class WhaleData(Dataset):
    def __init__(self, data_file, root_dir , transform = None):
        self.csv_file = pd.read_csv(data_file)
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        return len(os.listdir(self.root_dir))
    
    def __getitem__(self, index):
        image = os.path.join(self.root_dir, self.csv_file['Image'][index])
        image = Image.open(image).convert('RGB')
        label  = self.csv_file['Id'][index]
        sample = {'image': image, 'label':label}
        if self.transform:
            sample['image'] = self.transform(sample['image'])
        return sample

out_path = '/mnt/machine/Ryan/1234/models/'
trainset  = WhaleData(data_file = '/mnt/machine/Ryan/1234/train_encoded.csv', 
         root_dir = '/mnt/machine/Ryan/1234/train',transform = transform)
train_loader = torch.utils.data.DataLoader(trainset,batch_size = 128)
test_loader = torch.utils.data.DataLoader(trainset)

model_ft = torchvision.models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 4251)
model_ft = model_ft
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr = 0.001, momentum = 0.9)
for epoch in range(100):
    running_loss = 0.0
    for index,info in enumerate(test_loader):
        inputs = info['image']
        inputs = Variable(inputs.float())#.cuda()
        labels= Variable(info['label'].long())#.cuda()
        # print (model_conv)
        outputs = model_ft(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss
        print ('Loss for this image  ', loss.data[0])
        writer.add_scalar('loss', loss.data[0])
        log_value('train_loss', loss, epoch)
    torch.save(model_ft.state_dict, out_path+'{}.pth'.format(epoch))

Latope2-150 · June 11, 2018, 6:27pm

First thing to try is to specify num_workers=1 or more in your loaders.
On a side note, if your model runs on CPU, it is normal to have high CPU usage

RegularGuY · June 11, 2018, 6:31pm

Hi @Latope2-150 Giving the argument num_workers=1 does not change the cpu usage,Any other ideas ?

Latope2-150 · June 11, 2018, 6:32pm

Are you sure you model runs on GPU, cause from your code the model runs on CPU

RegularGuY · June 11, 2018, 6:33pm

Hmmmm let me check this once again

RegularGuY · June 11, 2018, 6:34pm

I dont know what to say,Thanks a lot ,The consumption rate is much lower now,Thanks for your time.!!!

RegularGuY · June 11, 2018, 6:37pm

I just have another question ,my batch size is set to 128 , when i run the code, it occupies about 750 of my GPU memory , When i increase it to 256, it still occupies only 750Mb of memory,do you see anything wrong here?

Latope2-150 · June 11, 2018, 6:39pm

From your code, you are training on your test_loader, so you probably want to change
for index,info in enumerate(test_loader):
to
for index,info in enumerate(train_loader):

RegularGuY · June 11, 2018, 6:41pm

Works Well now,Thanks for all your inputs ,Really helpful