I don’t know which code you are executing, but the linked example code seems to work for me:
import torchaudio
bundle = torchaudio.pipelines.HUBERT_BASE
model = bundle.get_model()
waveform = torch.randn(1, 1000)
features, _ = model.extract_features(waveform)
for f in features:
print(f.shape)
# torch.Size([1, 2, 768])
# torch.Size([1, 2, 768])
# torch.Size([1, 2, 768])
# torch.Size([1, 2, 768])
# torch.Size([1, 2, 768])
# torch.Size([1, 2, 768])
# torch.Size([1, 2, 768])
# torch.Size([1, 2, 768])
# torch.Size([1, 2, 768])
# torch.Size([1, 2, 768])
# torch.Size([1, 2, 768])
# torch.Size([1, 2, 768])
waveform = torch.randn(2, 1000)
features, _ = model.extract_features(waveform)
for f in features:
print(f.shape)
# torch.Size([2, 2, 768])
# torch.Size([2, 2, 768])
# torch.Size([2, 2, 768])
# torch.Size([2, 2, 768])
# torch.Size([2, 2, 768])
# torch.Size([2, 2, 768])
# torch.Size([2, 2, 768])
# torch.Size([2, 2, 768])
# torch.Size([2, 2, 768])
# torch.Size([2, 2, 768])
# torch.Size([2, 2, 768])
# torch.Size([2, 2, 768])
waveform = torch.randn(16, 1000)
features, _ = model.extract_features(waveform)
for f in features:
print(f.shape)
# torch.Size([16, 2, 768])
# torch.Size([16, 2, 768])
# torch.Size([16, 2, 768])
# torch.Size([16, 2, 768])
# torch.Size([16, 2, 768])
# torch.Size([16, 2, 768])
# torch.Size([16, 2, 768])
# torch.Size([16, 2, 768])
# torch.Size([16, 2, 768])
# torch.Size([16, 2, 768])
# torch.Size([16, 2, 768])
# torch.Size([16, 2, 768])
Do you see the same behavior or does it differ?