# example 1)
list_ = []
for i in range(3):
list_.append(i)
-----------------------------------------
# example 2)
x = torch.empty(2,3)
for i in range(3):
y = torch.FloatTensor([[1,2,3],[4,5,6]])
x = torch.cat([x, y], dim=0)
print(x)
> tensor([[-1.9967e-31, 4.5713e-41, 9.6263e-38],
[ 0.0000e+00, -1.6586e-35, 4.5713e-41],
[ 1.0000e+00, 2.0000e+00, 3.0000e+00],
[ 4.0000e+00, 5.0000e+00, 6.0000e+00]])
list_=[] have no element but torch.empty() have some trash values.
How can I use torch.cat() without trash values ?
If this is impossible, how can I remove trash values from x ?
@albanD Thanks for you answer.
In my case, torch.cat() is much slower than list.append().
So I’m triying to change my code to use append() to list.
new_dataset_x = []
new_dataset_y = []
for step, data in train_loader:
inputs, labels = data # inputs.shape == [64,3,28,28]
...
# Get some imgs from inputs under some conditions
new_dataset_x.append(ok_imgs) # ok_imgs.shape == [??,3,28,28]. first dim size is not equal on every step.
new_dataset_y.append(ok_labels) # ok.labels.shape == [??]
As a result, each element of new_dataset have different size of 1st dimension.
new_dataset_x[0].shape : [54,3,28,28]
new_dataset_x[1].shape : [34,3,28,28]
...
How can I make DataLoader with this?
Or this approach is inefficient, please recommend another way.
One trick is to .append() to a list and then do a single .cat() at the end with the whole list.
The other is to create a custom Dataset that takes this list as input and give that to your Dataloader. Note that doing the .cat() and using a TensorDataset will be faster during training !
I find that stack is also a good alternative (especially with nested list of tensors or something like that). e.g.
# %%
import torch
# stack vs cat
# cat "extends" a list in the given dimension e.g. adds more rows or columns
x = torch.randn(2, 3)
print(f'{x.size()}')
# add more rows (thus increasing the dimensionality of the column space to 2 -> 6)
xnew_from_cat = torch.cat((x, x, x), 0)
print(f'{xnew_from_cat.size()}')
# add more columns (thus increasing the dimensionality of the row space to 3 -> 9)
xnew_from_cat = torch.cat((x, x, x), 1)
print(f'{xnew_from_cat.size()}')
print()
# stack serves the same role as append in lists. i.e. it doesn't change the original
# vector space but instead adds a new index to the new tensor, so you retain the ability
# get the original tensor you added to the list by indexing in the new dimension
xnew_from_stack = torch.stack((x, x, x, x), 0)
print(f'{xnew_from_stack.size()}')
xnew_from_stack = torch.stack((x, x, x, x), 1)
print(f'{xnew_from_stack.size()}')
xnew_from_stack = torch.stack((x, x, x, x), 2)
print(f'{xnew_from_stack.size()}')
# default appends at the from
xnew_from_stack = torch.stack((x, x, x, x))
print(f'{xnew_from_stack.size()}')
print('I like to think of xnew_from_stack as a \"tensor list\" that you can pop from the front')
print()
lst = []
print(f'{x.size()}')
for i in range(10):
x += i # say we do something with x at iteration i
lst.append(x)
# lstt = torch.stack([x for _ in range(10)])
lstt = torch.stack(lst)
print(lstt.size())
print()
# lst = []
# print(f'{x.size()}')
# for i in range(10):
# x += i # say we do something with x at iteration i
# for j in range(11):
# x += j
# lstx
# lst.append(x)
# # lstt = torch.stack([x for _ in range(10)])
# lstt = torch.stack(lst)
# print(lstt.size())