@peterjc123 I still have the problem, and training my model is very slow. I just tried updating CUDA toolkit to 10.1 now.
class PermDataset(Dataset):
"""Perm dataset."""
def __init__(self, csv_file, img_dir, Brea, Benth, GH, Leo, Nav, Bn, CL_20, CL_15, CL_10):
"""
Args:
csv_file (string): Path to the csv file with labels
img_dir (string): Directory with all the images.
return: subvolume (3d numpy array), ID of subvolume, and label
"""
self.file_name = pd.read_csv(open(csv_file, 'rU'), encoding='utf-8', engine='c')
self.img_dir = img_dir
self.Brea = Brea #3D numpy array
self.Benth = Benth #3D numpy array
self.GH = GH #3D numpy array
self.Leo = Leo #3D numpy array
self.Nav = Nav #3D numpy array
self.Bn = Bn #3D numpy array
self.CL_20 = CL_20 #3D numpy array
self.CL_15 = CL_15 #3D numpy array
self.CL_10 = CL_10 #3D numpy array
def __len__(self):
return len(self.file_name)
def __getitem__(self, idx):
"""
1- get the index of training
2- get the name of rock and id of subvolume
3- open the image and reterive the subvolume
4- find the corrsponding permeability value from csv file
"""
idx_name = self.file_name.iloc[idx, 1]
idx_label = self.file_name.iloc[idx, 4]
idx_direction = idx_name[-2]
name_string = re.search(r'[A-Z]...', idx_name).group(0)
digits_in_string = re.search(r'[0-9]+', idx_name).group(0)
s1 = int(''.join(digits_in_string))
# assign image
if 'Br' in name_string:
img = self.Brea
elif 'Ben' in name_string:
img = self.Benth
elif 'BnD' in name_string:
img = self.Bn
elif 'ND3' in name_string:
img = self.Nav
elif 'Leo' in name_string:
img = self.Leo
elif 'GHD' in name_string:
img = self.GH
elif 'CL20' in name_string:
img = self.CL_20
elif 'CL15' in name_string:
img = self.CL_15
elif 'CL10' in name_string:
img = self.CL_10
else:
print('no matching image')
cube = shift(img, 64, 32, s1, idx_direction) #function for getting a subvolume from the 3d matrix
cube = np.expand_dims(cube, 0)
return cube, idx_label, idx_name
my main code structure:
def trainProcess(load_model=False):
# writer = SummaryWriter()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = CNN3D().float()
model = resnet50().float()
model.to(device)
criterion = torch.nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
best_val = float(0)
early_stopping = EarlyStopping(patience=15, verbose=True)
Brea_image = np.load(os.path.join(img_path, 'BrD2.npy'))
Benth_image = np.load(os.path.join(img_path, 'BenD2.npy'))
GH_image = np.load(os.path.join(img_path, 'GHD3.npy'))
Leo_image = np.load(os.path.join(img_path, 'LeoD3.npy'))
ND_image = np.load(os.path.join(img_path, 'ND3.npy'))
Bn_image = np.load(os.path.join(img_path, 'BnD3.npy'))
CL_20 = np.load(os.path.join(img_path, 'CL_20.npy'))
CL_15 = np.load(os.path.join(img_path, 'CL_15.npy'))
CL_10 = np.load(os.path.join(img_path, 'CL_10.npy'))
# Dataset Split
jobid = r"C:\Users\drn-4\Desktop\3D\logs_loss\Regresion{}".format(time.strftime("%Y%m%d-%H%M%S"))
dataset = PermDataset(labels_file, img_path, Brea_image, Benth_image, GH_image, Leo_image, ND_image, Bn_image,
CL_20, CL_15, CL_10)
batch_size = 3
validation_split = .25
testing_split = 0.5
shuffle_dataset = True
random_seed = 4
validate_every = 1
# Creating data indices for training and validation splits:
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset:
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indices, val_test_indices = indices[split:], indices[:split]
split1 = int(np.floor(testing_split * len(val_test_indices)))
val_indices, test_indices = val_test_indices[split1:], val_test_indices[:split1]
# Creating PT data samplers and loaders:
train_dataset1 = torch.utils.data.Subset(dataset, train_indices)
valid_dataset1 = torch.utils.data.Subset(dataset, val_indices)
test_dataset1 = torch.utils.data.Subset(dataset, test_indices)
train_loader = torch.utils.data.DataLoader(train_dataset1, batch_size=batch_size,
num_workers=4, shuffle=True, pin_memory=False)
validation_loader = torch.utils.data.DataLoader(valid_dataset1, batch_size=batch_size,
num_workers=0, shuffle=True, pin_memory=False)
test_loader = torch.utils.data.DataLoader(test_dataset1, batch_size=batch_size,
num_workers=0, shuffle=True, pin_memory=False)
win = viz.scatter(
X=np.array([[1, 2], [3, 4]]),
opts=dict(
markersize=10,
legend=['Samples']
),
)
# Now, let's start the training process!
print('Training...')
for epoch in range(100):
# Compute a training epoch
loss = trainEpoch(train_loader, model, criterion, optimizer, epoch, device)
# Compute a validation epoch
if epoch % validate_every == 0:
val_loss = valEpoch(device, validation_loader, model, criterion, epoch, win)
# Print validation accuracy and best validation accuracy
is_best = bool(val_loss > best_val)
best_val = max(val_loss, best_val)
print('** Validation: %f (best) - %f (current)' % (best_val, val_loss))
early_stopping(val_loss, model, epoch, optimizer)
if early_stopping.early_stop:
print("Early stopping")
break
if epoch == 0:
torch.save({
'train_indices': train_indices, 'validation_indices': val_indices, 'test_indices': test_indices
}, r'C:\Users\drn-4\Desktop\3D\checkdir\train_val_test_indices.pth'.format(time.strftime("%Y%m%d-%H%M%S"),
epoch))
if __name__ == "__main__":
global writer
writer = SummaryWriter()
DEFAULT_PORT = 8097
DEFAULT_HOSTNAME = "http://localhost"
viz = Visdom(port=DEFAULT_PORT, server=DEFAULT_HOSTNAME, base_url='/', username='',
password='',
use_incoming_socket=True,
env=str('ResNext50_{}'.format(time.strftime("%Y%m%d-%H%M%S"))))
trainProcess()
writer.close()