multi processing dataloder wronf behaviors

:bug: Bug

I implement a iterable dataset and use dataloader to generate batch data,
when num worker set above 0, I find after epoch 0 (epoch 1,2,3…) will miss data.
like in epoch 0 has 40 batches but when in epoch 1,2,3 there are only 20 batches (equal to num_worker)
it seems in epoch 1,2,3, each workers onle read the first chunk of a textfilereader provided by pandas.

class pandasDataset(IterableDataset):
    def __init__(self, filelist, D, d):
        self.filelist = filelist
        self.iterlist = []
        self.D = D
        self.d = d
        for filepath in self.filelist:
            data_iter = pd.read_csv(filepath,
                                    iterator=True,
                                    chunksize=CHUNKSIZE,
                                    engine="c",
                                    error_bad_lines=False,
                                    warn_bad_lines=False)
            self.iterlist.append(data_iter)


    def calStepLoc(self, L, D, d):  
        start = 0
        end = 0
        count = 0
        while end != L:
            start = 0 + count * (D + d)
            end = min(start + D, L)
            count = count + 1
            yield start, end

    def calDist(self, data):
        L = data.shape[0]
        D = self.D
        d = self.d
        loc = self.calStepLoc(L, D, d)
        dist = []
        for step, (start, end) in enumerate(loc):

            twdata = data[start:end, :]
            tw = twdata[:, -1].unsqueeze(dim=0).matmul(
                twdata[:, 0:-1]).squeeze(dim=0).float()
            twsum = tw.sum(dim=0).float().unsqueeze(dim=0)
            twdist = tw.div(twsum).unsqueeze(dim=0)  # 每个时间窗口内的数据分布
            dist.append(twdist)
        return torch.cat(dist, dim=0)

    def __iter__(self):
        worker_info = torch.utils.data.get_worker_info()
        if worker_info is None:
            list_start = 0
            list_end = len(self.iterlist) - 1
        else:
            worker_id = worker_info.id
            per_iternums = int(
                math.ceil(len(self.iterlist) / float(worker_info.num_workers)))
            list_start = 0 + worker_id * per_iternums
            list_end = min(list_start + per_iternums, len(self.iterlist))
        for fileind, dataiter in enumerate(self.iterlist[list_start:list_end]):
            for step,data in enumerate(dataiter):
                #                 pad_data, length = self.pad(torch.tensor(np.array(data)))
                data = torch.tensor(np.array(data))
                distdata = self.calDist(data)
                yield distdata
                del data         
train_list = []
test_list = []
for index, filename in enumerate(filelist):
    if index % 5 == 0:
        test_list.append(filename)
    else:
        train_list.append(filename)
train_dataset = pandasDataset(train_list, D, d)
test_dataset = pandasDataset(test_list, D, d)
train_dataloader = DataLoader(train_dataset,
                              batch_size=BATCHSIZE,
                              num_workers=10,
                              pin_memory = True,
                              collate_fn=collate_fn)
test_dataloader = DataLoader(test_dataset,
                             batch_size=BATCHSIZE,
                             num_workers=10,
                             pin_memory = True,
                             collate_fn=collate_fn)

Environment

Collecting environment information...
PyTorch version: 1.10.0
Is debug build: False
CUDA used to build PyTorch: 11.3
ROCM used to build PyTorch: N/A

OS: Ubuntu 20.04.3 LTS (x86_64)
GCC version: (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
Clang version: Could not collect
CMake version: Could not collect
Libc version: glibc-2.31

Python version: 3.8.12 (default, Oct 12 2021, 13:49:34)  [GCC 7.5.0] (64-bit runtime)
Python platform: Linux-5.11.0-38-generic-x86_64-with-glibc2.17
Is CUDA available: True
CUDA runtime version: 10.1.243
GPU models and configuration: GPU 0: Quadro GV100
Nvidia driver version: 470.74
cuDNN version: Could not collect
HIP runtime version: N/A
MIOpen runtime version: N/A

Versions of relevant libraries:
[pip3] numpy==1.19.2
[pip3] numpydoc==1.1.0
[pip3] torch==1.10.0
[pip3] torchaudio==0.10.0
[pip3] torchvision==0.11.1
[conda] _pytorch_select           0.1                       cpu_0    defaults
[conda] blas                      1.0                         mkl    defaults
[conda] cudatoolkit               11.3.1               h2bc3f7f_2    defaults
[conda] ffmpeg                    4.3                  hf484d3e_0    pytorch
[conda] libmklml                  2019.0.5                      0    defaults
[conda] mkl                       2020.2                      256    defaults
[conda] mkl-service               2.3.0            py38he904b0f_0    defaults
[conda] mkl_fft                   1.3.0            py38h54f3939_0    defaults
[conda] mkl_random                1.1.1            py38h0573a6f_0    defaults
[conda] numpy                     1.19.2           py38h54aff64_0    defaults
[conda] numpy-base                1.19.2           py38hfa32c7d_0    defaults
[conda] numpydoc                  1.1.0              pyhd3eb1b0_1    defaults
[conda] pytorch                   1.10.0          py3.8_cuda11.3_cudnn8.2.0_0    pytorch
[conda] pytorch-mutex             1.0                        cuda    pytorch
[conda] torchaudio                0.10.0               py38_cu113    pytorch
[conda] torchvision               0.11.1               py38_cu113    pytorch

Additional context

I don’t know what kind of shape the loaded data has and how the indexing works, so could you please check the intermediate values used in calDist and e.g. calStepLoc and make sure the indexing is correct?