How to align the resample result of librosa and torchaudio

wadewang · September 12, 2024, 2:54am

The speed of torchauido resample attracts me after compare against librosa. But I found their results are a little different.
I have read this documentation: Audio Resampling — Torchaudio 2.4.0 documentation , but it just explained the resample method of kaiser, the default resample method of librosa is soxr_hq, so how to set the parameters of torchaudio to make its result align to that of librosa with soxr_hq ?

This is my test code:

import time
import numpy as np
import soundfile as sf
import librosa
print(f"librosa version: {librosa.__version__}")
import scipy.io.wavfile as wavfile
import torchaudio
from pathlib import Path

# Define function to load wav and measure time
def measure_load_time(library_name, load_func, file_path, target_sr=16000):
    start_time = time.time()
    if library_name == "librosa":
        data, sr = load_func(file_path, sr=None)
        if sr != target_sr:
            print(f"librosa: Resampling from {sr} to {target_sr}")
            data = librosa.resample(data, orig_sr=sr, target_sr=target_sr)
            sr = target_sr
    elif library_name == "torchaudio":
        data, sr = load_func(file_path)
        if sr != target_sr:
            print(f"torchaudio: Resampling from {sr} to {target_sr}")
            data = torchaudio.functional.resample(data, orig_freq=sr, new_freq=target_sr)
            sr = target_sr
        data = data.numpy().squeeze()

    end_time = time.time()
    duration = end_time - start_time
    return data, sr, duration

# Load wav files and compare
file_path = r"test_task_123.wav"

# Measure times and compare outputs
results = {}

# Librosa
data_librosa, sr_librosa, time_librosa = measure_load_time("librosa", librosa.load, file_path)
results["librosa"] = {"data": data_librosa, "sr": sr_librosa, "time": time_librosa}

# Torchaudio
data_torchaudio, sr_torchaudio, time_torchaudio = measure_load_time("torchaudio", torchaudio.load, file_path)
results["torchaudio"] = {"data": data_torchaudio, "sr": sr_torchaudio, "time": time_torchaudio}

print("Comparison of data shapes:")
for lib, result in results.items():
    print(f"{lib}: shape = {result['data'].shape}, sample rate = {result['sr']}")
    print(f"{lib} first 10 values: {result['data'][:10]}")


print("\nComparison of loading times (in seconds):")
for lib, result in results.items():
    print(f"{lib}: {result['time']} seconds")

def compare_data(data1, data2):
    return np.allclose(data1, data2, atol=1e-6)

print(f"librosa vs torchaudio: {compare_data(results['librosa']['data'], results['torchaudio']['data'])}")

wadewang · September 13, 2024, 6:59am

Hello，I tried different parameters of resampling_method and lowpass_filter_width，find the minimal error with librosa basically converge to 2*10^(-6)

wengzhenjie · October 31, 2024, 2:32am

I have a same problem. Do you solve this problem?