I want to install torchcodec 0.2.1 on windows.
I checked download.pytorch.org/whl/nightly/torchcodec/
Are all the versions non-windows?
Is there one I should use or could someone make one?
Hey, you’re right, most of the torchcodec wheels currently available are built for Linux, and there doesn’t seem to be an official Windows wheel for version 0.2.1. You could try building it from source if you’re comfortable with that, but it can get tricky with dependencies. Another option is to set up a WSL2 environment and use the Linux wheel there as a workaround. Hopefully, Windows support becomes more consistent in future releases.
i had the same issues while trying to use pyannote/community 1 on windows,
the AudioDecoder tripped me up the whole time. so i was able to (vibecode) a working stub for my phyton programm:
def _install_windows_audio_stub():
import torch, soundfile as sf, numpy as np, resampy, types
from types import SimpleNamespace
*class* AudioSamples:
"""Mimic torchcodec.AudioSamples"""
*def* \__init_\_(*self*, *data*, *sample_rate*):
*self*.data = *data* # torch.Tensor \[channels, samples\]
*self*.sample_rate = *sample_rate*
*class* AudioDecoder:
"""TorchCodec-less audio decoder stub for Windows (full API)."""
*def* \__init_\_(*self*, *path_or_file*, \**\_*, \*\**\_\_*):
*self*.path = (
*path_or_file*
if isinstance(*path_or_file*, str)
else *path_or_file*.get("audio", *path_or_file*.get("uri", None))
)
info = sf.info(*self*.path)
dur = info.frames / info.samplerate if info.samplerate > 0 else 0.0
*self*.metadata = SimpleNamespace(
*sample_rate*=info.samplerate,
*num_channels*=info.channels,
*num_frames*=info.frames,
*dtype*="float32",
*duration_seconds_from_header*=dur,
)
*self*.sample_rate = info.samplerate
\# ---------- core audio access ----------
*def* get_all_samples(*self*):
"""Return full audio as AudioSamples."""
data, sr = sf.read(*self*.path, *dtype*="float32")
if sr != *self*.sample_rate:
data = resampy.resample(data, sr, *self*.sample_rate)
if data.ndim == 1:
data = np.expand_dims(data, 0)
else:
data = data.T
tensor = torch.from_numpy(data)
return AudioSamples(tensor, *self*.sample_rate)
*def* get_samples_played_in_range(*self*, *start_time*: float, *end_time*: float):
"""Return samples between timestamps (in seconds)."""
data, sr = sf.read(*self*.path, *dtype*="float32")
if sr != *self*.sample_rate:
data = resampy.resample(data, sr, *self*.sample_rate)
start = int(*start_time* \* *self*.sample_rate)
end = int(*end_time* \* *self*.sample_rate)
sliced = data\[start:end\]
if sliced.ndim == 1:
sliced = np.expand_dims(sliced, 0)
else:
sliced = sliced.T
return AudioSamples(torch.from_numpy(sliced), *self*.sample_rate)
*def* crop(*self*, *file*, *segment*=None):
"""Crop segment or return full waveform dict."""
path = *file*.get("audio", *file*.get("uri", *file*))
data, sr = sf.read(path, *dtype*="float32")
if sr != *self*.sample_rate:
data = resampy.resample(data, sr, *self*.sample_rate)
if *segment* is not None:
start = int(*segment*.start \* *self*.sample_rate)
end = int(*segment*.end \* *self*.sample_rate)
data = data\[start:end\]
tensor = torch.from_numpy(np.expand_dims(data, 0))
return {"waveform": tensor, "sample_rate": *self*.sample_rate}
\# ---------- registration ----------
mod = types.ModuleType("pyannote.audio.pipelines.utils.audio")
mod.AudioDecoder = AudioDecoder
sys.modules\["pyannote.audio.pipelines.utils.audio"\] = mod
import builtins
builtins.AudioDecoder = AudioDecoder
print("⚙️ Windows AudioDecoder stub installed (full torchcodec API mimic)")
maybe it can help someone.