Hi,
I’ve realised there is some sort of strange behaviour if I use max pooling in this model with the gpu version.
I’m making a batch of identical samples. They do have temporal features to be processed with 2D convolutions, thus i process everything in the batch dim.
I’ve seen that, if I use max_pool the output is different for the different elements of the batch, they are exactly the same otherwise.
This doesn’t happen with the CPU version but GPU only.
It happens for Titan GTX, 1080 Ti, Quadro P6000 with 2 different computers and setups.
import torch
import sys
import subprocess
from torch import nn
from torchaudio.transforms import MelSpectrogram, Spectrogram
import torch
N_FFT = 512
N_MELS = 256
HOP_LENGTH = 130
AUDIO_FRAMERATE = 16000
def get_sys_info():
"""
:param log: Logging logger in which to parse info
:type log: logging.logger
:return: None
"""
result = subprocess.Popen(["nvidia-smi", "--format=csv",
"--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
nvidia = result.stdout.readlines().copy()
nvidia = [str(x) for x in nvidia]
nvidia = [x[2:-3] + '\r\t' for x in nvidia]
acum = ''
for x in nvidia:
acum = acum + x
return (' Python VERSION: {0} \n\t'
' pyTorch VERSION: {1} \n\t'
' CUDA VERSION: {2}\n\t'
' CUDNN VERSION: {3} \n\t'
' Number CUDA Devices: {4} \n\t'
' Devices: {5}\n\t'
'Active CUDA Device: GPU {6} \n\t'
'Available devices {7} \n\t'
'Current cuda device {8} \n\t'.format(sys.version, torch.__version__, torch.version.cuda,
torch.backends.cudnn.version(), torch.cuda.device_count(),
acum, torch.cuda.current_device(), torch.cuda.device_count(),
torch.cuda.current_device()))
def make_audio_block(filter_in, filters_out, lrn, max_pool=None, padding=0, stride=1, kernel_size=(3, 3)):
layers = [nn.Conv2d(filter_in, filters_out, kernel_size=kernel_size, padding=padding, stride=stride)]
layers.append(nn.ReLU(False))
if max_pool is not None:
layers.append(nn.MaxPool2d(max_pool))
return nn.Sequential(*layers)
def reshape(x, unrolled_shape):
return x.view(*unrolled_shape, *x.shape[1:])
def check(x, unrolled_shape):
x_unrolled = reshape(x, unrolled_shape)
ref = x_unrolled[0]
all_equal = True
error_abs = [None for _ in range(unrolled_shape[0])]
error_mean = [None for _ in range(unrolled_shape[0])]
error_abs[0] = 0
error_mean[0] = 0
for i in range(1, unrolled_shape[0]):
all_equal = all_equal and torch.allclose(ref, x_unrolled[i])
diff = torch.abs(ref - x_unrolled[i])
diff = diff[diff > 0]
error_abs[i] = diff.sum()
error_mean[i] = diff.mean()
return all_equal, max(error_abs), max(error_mean)
class AudioEncoder(nn.Module):
# 'filter_size': [96, 256, 512, 512*6*6]
def __init__(self, pooling, pooling_type='AvgPool'):
super(AudioEncoder, self).__init__()
assert pooling_type in ['MaxPool', 'AvgPool'], f'Pooling of type{pooling_type} should be MaxPool or AvgPool'
filters = [1, 32, 64, 128]
# self.preproc = MelSpectrogram(sample_rate=AUDIO_FRAMERATE, n_fft=N_FFT, hop_length=HOP_LENGTH, n_mels=N_MELS)
self.preproc = Spectrogram(n_fft=N_FFT, hop_length=HOP_LENGTH)
self.b1 = make_audio_block(filters[0], filters[1], lrn=False, max_pool=pooling, padding=2, kernel_size=(7, 7),
stride=(2, 1))
self.b2 = make_audio_block(filters[1], filters[2], lrn=False, max_pool=pooling, padding=0, kernel_size=(7, 3))
self.b3 = make_audio_block(filters[2], filters[3], lrn=False, padding=0, kernel_size=(7, 3))
if pooling_type == 'MaxPool':
self.pooling = nn.AdaptiveMaxPool2d((1, None))
else:
self.pooling = nn.AdaptiveAvgPool2d((1, None))
def forward(self, x):
verbose = True
unrolled_shape = x.shape[:2]
print(f'Unrolled shape: {unrolled_shape}')
if verbose:
print(f'Input--> Shape: {x.shape}, device:{x.device}')
x = self.preproc(x)
if verbose:
print(f'FFT --> Shape: {x.shape}')
x = x.view(-1, 1, *x.shape[2:])
if verbose:
equal, abs_max, mean_max = check(x, unrolled_shape)
print(f'view --> Shape: {x.shape}, all equal: {equal}, max error: abs {abs_max},mean {mean_max}')
x = self.b1(x)
if verbose:
equal, abs_max, mean_max = check(x, unrolled_shape)
print(f'b1 --> Shape: {x.shape}, all equal: {equal}, max error: abs {abs_max},mean {mean_max}')
x = self.b2(x)
if verbose:
equal, abs_max, mean_max = check(x, unrolled_shape)
print(f'b2 --> Shape: {x.shape}, all equal: {equal}, max error: abs {abs_max},mean {mean_max}')
x = self.b3(x)
if verbose:
equal, abs_max, mean_max = check(x, unrolled_shape)
print(f'b3 --> Shape: {x.shape}, all equal: {equal}, max error: abs {abs_max},mean {mean_max}')
x = self.pooling(x)
if verbose:
equal, abs_max, mean_max = check(x, unrolled_shape)
print(f'pooling --> Shape: {x.shape}, all equal: {equal}, max error: abs {abs_max},mean {mean_max}')
x = x.squeeze()
return x, x.shape
BATCH_SIZE = 16
@torch.no_grad()
def run_test(pooling, pooling_type, device):
device = torch.device(device)
model = AudioEncoder(pooling, pooling_type).to(device)
inp_element = torch.rand(25, 4480).to(device)
inp = torch.stack([inp_element.clone() for _ in range(BATCH_SIZE)])
# print(model)
y, shape = model(inp)
is_identical, max_abs, max_mean = check(y, [BATCH_SIZE, 25])
if is_identical:
print(f"Test: Pooling {pooling}, {pooling_type}. Device {device}, max_abs {max_abs},max_mean {max_mean} OK")
else:
print(
f"Test: Pooling {pooling}, {pooling_type}. Device {device}, max_abs {max_abs},max_mean {max_mean}. Failed")
print('---------------------------------------')
pooling_tests = [None, (3, 3)]
pooling_types = ['AvgPool']
devices = ['cuda:0', 'cuda:1', 'cpu']
if __name__ == '__main__':
print(get_sys_info())
for device in devices:
for pooling_i in pooling_tests:
for pooling_type_i in pooling_types:
run_test(pooling_i, pooling_type_i, device)
Python VERSION: 3.6.9 (default, Oct 8 2020, 12:12:24)
[GCC 8.4.0]
pyTorch VERSION: 1.7.0+cu110
CUDA VERSION: 11.0
CUDNN VERSION: 8004
Number CUDA Devices: 3
Active CUDA Device: GPU 0
Available devices 3
Current cuda device 0
Unrolled shape: torch.Size([16, 25])
Input--> Shape: torch.Size([16, 25, 4480]), device:cuda:0
/home/jfm/.local/lib/python3.6/site-packages/torch/functional.py:516: UserWarning: stft will require the return_complex parameter be explicitly specified in a future PyTorch release. Use return_complex=False to preserve the current behavior or return_complex=True to return a complex output. (Triggered internally at /pytorch/aten/src/ATen/native/SpectralOps.cpp:653.)
normalized, onesided, return_complex)
/home/jfm/.local/lib/python3.6/site-packages/torch/functional.py:516: UserWarning: The function torch.rfft is deprecated and will be removed in a future PyTorch release. Use the new torch.fft module functions, instead, by importing torch.fft and calling torch.fft.fft or torch.fft.rfft. (Triggered internally at /pytorch/aten/src/ATen/native/SpectralOps.cpp:590.)
normalized, onesided, return_complex)
FFT --> Shape: torch.Size([16, 25, 257, 35])
view --> Shape: torch.Size([400, 1, 257, 35]), all equal: True, max error: abs 0,mean 0
b1 --> Shape: torch.Size([400, 32, 128, 33]), all equal: True, max error: abs 0,mean 0
b2 --> Shape: torch.Size([400, 64, 122, 31]), all equal: True, max error: abs 0,mean 0
b3 --> Shape: torch.Size([400, 128, 116, 29]), all equal: True, max error: abs 0,mean 0
pooling --> Shape: torch.Size([400, 128, 1, 29]), all equal: True, max error: abs 0,mean 0
Test: Pooling None, AvgPool. Device cuda:0, max_abs 0,max_mean 0 OK
---------------------------------------
Unrolled shape: torch.Size([16, 25])
Input--> Shape: torch.Size([16, 25, 4480]), device:cuda:0
FFT --> Shape: torch.Size([16, 25, 257, 35])
view --> Shape: torch.Size([400, 1, 257, 35]), all equal: True, max error: abs 0,mean 0
b1 --> Shape: torch.Size([400, 32, 42, 11]), all equal: True, max error: abs 0,mean 0
b2 --> Shape: torch.Size([400, 64, 12, 3]), all equal: True, max error: abs 0,mean 0
b3 --> Shape: torch.Size([400, 128, 6, 1]), all equal: False, max error: abs 0.030087150633335114,mean 3.579247049856349e-06
pooling --> Shape: torch.Size([400, 128, 1, 1]), all equal: False, max error: abs 0.0028738644905388355,mean 1.6516462437721202e-06
Test: Pooling (3, 3), AvgPool. Device cuda:0, max_abs 0.0028738644905388355,max_mean 1.6516462437721202e-06. Failed
---------------------------------------
Unrolled shape: torch.Size([16, 25])
Input--> Shape: torch.Size([16, 25, 4480]), device:cuda:1
FFT --> Shape: torch.Size([16, 25, 257, 35])
view --> Shape: torch.Size([400, 1, 257, 35]), all equal: True, max error: abs 0,mean 0
b1 --> Shape: torch.Size([400, 32, 128, 33]), all equal: True, max error: abs 0,mean 0
b2 --> Shape: torch.Size([400, 64, 122, 31]), all equal: True, max error: abs 0,mean 0
b3 --> Shape: torch.Size([400, 128, 116, 29]), all equal: True, max error: abs 0,mean 0
pooling --> Shape: torch.Size([400, 128, 1, 29]), all equal: True, max error: abs 0,mean 0
Test: Pooling None, AvgPool. Device cuda:1, max_abs 0,max_mean 0 OK
---------------------------------------
Unrolled shape: torch.Size([16, 25])
Input--> Shape: torch.Size([16, 25, 4480]), device:cuda:1
FFT --> Shape: torch.Size([16, 25, 257, 35])
view --> Shape: torch.Size([400, 1, 257, 35]), all equal: True, max error: abs 0,mean 0
b1 --> Shape: torch.Size([400, 32, 42, 11]), all equal: True, max error: abs 0,mean 0
b2 --> Shape: torch.Size([400, 64, 12, 3]), all equal: True, max error: abs 0,mean 0
b3 --> Shape: torch.Size([400, 128, 6, 1]), all equal: False, max error: abs 0.03450433909893036,mean 4.106193046027329e-06
pooling --> Shape: torch.Size([400, 128, 1, 1]), all equal: False, max error: abs 0.0032321936450898647,mean 1.921637021951028e-06
Test: Pooling (3, 3), AvgPool. Device cuda:1, max_abs 0.0032321936450898647,max_mean 1.921637021951028e-06. Failed
---------------------------------------
Unrolled shape: torch.Size([16, 25])
Input--> Shape: torch.Size([16, 25, 4480]), device:cpu
FFT --> Shape: torch.Size([16, 25, 257, 35])
view --> Shape: torch.Size([400, 1, 257, 35]), all equal: True, max error: abs 0,mean 0
b1 --> Shape: torch.Size([400, 32, 128, 33]), all equal: True, max error: abs 0,mean 0
b2 --> Shape: torch.Size([400, 64, 122, 31]), all equal: True, max error: abs 0,mean 0
b3 --> Shape: torch.Size([400, 128, 116, 29]), all equal: True, max error: abs 0,mean 0
pooling --> Shape: torch.Size([400, 128, 1, 29]), all equal: True, max error: abs 0,mean 0
Test: Pooling None, AvgPool. Device cpu, max_abs 0,max_mean 0 OK
---------------------------------------
Unrolled shape: torch.Size([16, 25])
Input--> Shape: torch.Size([16, 25, 4480]), device:cpu
FFT --> Shape: torch.Size([16, 25, 257, 35])
view --> Shape: torch.Size([400, 1, 257, 35]), all equal: True, max error: abs 0,mean 0
b1 --> Shape: torch.Size([400, 32, 42, 11]), all equal: True, max error: abs 0,mean 0
b2 --> Shape: torch.Size([400, 64, 12, 3]), all equal: True, max error: abs 0,mean 0
b3 --> Shape: torch.Size([400, 128, 6, 1]), all equal: True, max error: abs 0,mean 0
pooling --> Shape: torch.Size([400, 128, 1, 1]), all equal: True, max error: abs 0,mean 0
Test: Pooling (3, 3), AvgPool. Device cpu, max_abs 0,max_mean 0 OK
---------------------------------------
Process finished with exit code 0