Hi,
I’m new to Pytorch and deep learning in general. I’m developing a bacterial cell segmentation tool for microscopy with Pytorch/Unet. Since bacterial cells are very small (~1 micron wide x 3 microns long), they are only 20 or so pixels wide and I can’t simply load my images (1460 x 1936 pixels ) and scale them down without losing critical information. Instead, we’ve been subsampling regions small enough that can be run on a gpu in batches of 16 or so (160 x 160 pixels) and stitching the model predictions together at the end to get the full image mask. I’m wondering if there is a better/more established method for this kind of subsampling.
Any help would be greatly appreciated.
Here’s some example code:
#import raw image datasets and resize if necessary
if len(training_data) == len(training_mask): #check list of training and label images
train_set_size = len(training_data)
X_data = np.zeros([train_set_size,1460,1936])
Y_data = np.zeros([train_set_size,1460,1936])
n = 0
for X_file, y_file, in zip(training_data,training_mask):
#Load raw images .tiff .tif
if X_file[-4:] == '.tif' or X[-4:] == 'tiff':
X_image_load = np.array(Image.open(join(folder_path,X_file)))
if X_image_load.shape == (728, 968):
X_image_for_stack = Image.fromarray(X_image_load).resize((1936, 1460))
elif X_image_load.shape == (1460, 1936):
X_image_for_stack = X_image_load
else:
print(X_file + 'has wrong size')
X_data[n,:,:] = np.array(X_image_for_stack) #stack raw images
y_image_load = np.array(Image.open(join(folder_path,y_file)))
if y_image_load.shape == (1460, 1936):
y_image_for_stack = np.array(y_image_load)
elif y_image_load.shape == (728, 968):
y_image_for_stack = Image.fromarray(y_image_load).resize((1936, 1460))
elif y_image_load.shape == (1456, 1936):
y_image_for_stack = Image.fromarray(y_image_load).resize((1936, 1460))
Y_data[n,:,:] = np.array(y_image_for_stack) #stack raw label images (binary masks)
n += 1
X = []
Y = []
Coverage = 10
image_width = 1936
image_height = 1460
sample_px = 160
n_samples = int(Coverage * image_width * image_height / (sample_px**2))
#randomly select points to subsample from
H = np.random.randint(int(sample_px/2) + 1,image_height - int(sample_px/2), n_samples) #height sampling
W = np.random.randint(int(sample_px/2) + 1,image_width -int(sample_px/2), n_samples) #width sampling
Stack_idx = np.random.randint(0, train_set_size, n_samples) #image stack sampling
Transp = np.random.randint(0, 2, n_samples) #randomly transpose images
Blur = np.random.randint(0, 2, n_samples) #randomly blur images
#loop over all n_samples for random subsampling and append 160 x 160 array to stacked set of data and labels
for i in range(n_samples):
tmp_X = X_data[Stack_idx[i],H[i]-int(sample_px/2):H[i]+int(sample_px/2),W[i]-sample_px/2:W[i]+sample_px/2]
tmp_X = (tmp_X-np.min(tmp_X))/np.max(tmp_X) #normalize data between 0 and 1
if Transp[i]:
tmp_X = tmp_X.T #transpose
if Blur[i] and i > 5000 and (i%20 == 1):
tmp_X = gaussian_filter(tmp_X,np.random.randint(2, 5)) #blur for later in training epoch
X.append(tmp_X)
tmp_Y = Y_data[Stack_idx[i],H[i]-int(sample_px/2):H[i]+int(sample_px/2),W[i]-sample_px/2:W[i]+sample_px/2]
tmp_Y = tmp_Y/255 #normalize data between 0 and 1
if Transp[i]:
tmp_Y = tmp_Y.T #transpose
Y.append(tmp_Y)
i += 1
x_train_tensor = torch.Tensor(X)
y_train_tensor = torch.Tensor(Y)
dataset = TensorDataset(x_train_tensor, y_train_tensor)