How to divide the data into chunk

ROOT = ‘.data’

train_data = datasets.MNIST(root = ROOT,
train = True,
download = True)

mean = train_data.data.float().mean() / 255
std = train_data.data.float().std() / 255

train_transforms = transforms.Compose([
transforms.RandomRotation(5, fill=(0,)),
transforms.RandomCrop(28, padding = 2),
transforms.ToTensor(),
transforms.Normalize(mean = [mean], std = [std])
])

test_transforms = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean = [mean], std = [std])
])
VALID_RATIO = 0.9

n_train_examples = int(len(train_data) * VALID_RATIO)
n_valid_examples = len(train_data) - n_train_examples
train_data, valid_data = data.random_split(train_data,
[n_train_examples, n_valid_examples])

i need to devide the train_data into 10 chunk …please support me…i am new to pytorch

If you want some folds for cross validation a simple thing to do is to shuffle the data once beforehand (perhaps deterministically by setting the seed manually to a fixed value beforehand).
If you split data into 10 chunks, you can do something like:

data_len = len(train_data)
nchunks = 10
chunksiz = data_len/nchunks
chunks = list()
for i in range(nchunks):
    start_idx = i*nchunks
    chunks.append(train_data[start_idx:start_idx+chunksiz])
1 Like
data_len = len(train_data)
nchunks = 10
chunksiz = int(data_len/nchunks)
chunks = list()
for i in range(nchunks):
    start_idx = i*nchunks
    chunks.append(train_data[start_idx:start_idx+chunksiz])

i got the follwoing result..
ValueError: only one element tensors can be converted to Python scalars

can you suggest me how to resolve this issue?

Thank you so much for your respone…

Can you post the line that causes that issue? I can’t reproduce that on my end with

import torch

train_data = torch.rand(100, 32, 32)
data_len = len(train_data)
nchunks = 10
chunksiz = int(data_len/nchunks)
chunks = list()
for i in range(nchunks):
    start_idx = i*nchunks
    chunks.append(train_data[start_idx:start_idx+chunksiz])
print([chunk.shape for chunk in chunks])
[torch.Size([10, 32, 32]), torch.Size([10, 32, 32]), torch.Size([10, 32, 32]), torch.Size([10, 32, 32]), torch.Size([10, 32, 32]), torch.Size([10, 32, 32]), torch.Size([10, 32, 32]), torch.Size([10, 32, 32]), torch.Size([10, 32, 32]), torch.Size([10, 32, 32])]
1 Like

If your train_data and valid_data are imported as Tensors you can use torch.chunk to split your Tensors into n chunks. The docs for torch.chunk is here → torch.chunk — PyTorch 1.9.0 documentation

Yeah that is absolutly working fine.But while i am trying with mnist dataset…its not working…it showing error as i mentioned…

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

import torchvision.transforms as transforms
import torchvision.datasets as datasets


ROOT = '.data'

train_data = datasets.MNIST(root = ROOT, 
                            train = True, 
                            download = True)

mean = train_data.data.float().mean() / 255
std = train_data.data.float().std() / 255
train_transforms = transforms.Compose([
                            transforms.RandomRotation(5, fill=(0,)),
                            transforms.RandomCrop(28, padding = 2),
                            transforms.ToTensor(),
                            transforms.Normalize(mean = [mean], std = [std])
                                      ])

test_transforms = transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize(mean = [mean], std = [std])
                                     ])
train_data = datasets.MNIST(root = ROOT, 
                            train = True, 
                            download = True, 
                            transform = train_transforms)

test_data = datasets.MNIST(root = ROOT, 
                           train = False, 
                           download = True, 
                           transform = test_transforms)

VALID_RATIO = 0.9

n_train_examples = int(len(train_data) * VALID_RATIO)
n_valid_examples = len(train_data) - n_train_examples

train_data, valid_data = data.random_split(train_data, 
                                           [n_train_examples, n_valid_examples])

data_len = len(train_data)
nchunks = 10
chunksiz = int(data_len/nchunks)
chunks = list()
for i in range(nchunks):
    start_idx = i*nchunks
    chunks.append(train_data[start_idx:start_idx+chunksiz])
print([chunk.shape for chunk in chunks])