How to make really empty tensor?

# example 1)

list_ = []
for i in range(3):
   list_.append(i)
-----------------------------------------

# example 2)

x = torch.empty(2,3)
for i in range(3):
  y = torch.FloatTensor([[1,2,3],[4,5,6]])
  x = torch.cat([x, y], dim=0)
print(x)
> tensor([[-1.9967e-31,  4.5713e-41,  9.6263e-38],
        [ 0.0000e+00, -1.6586e-35,  4.5713e-41],
        [ 1.0000e+00,  2.0000e+00,  3.0000e+00],
        [ 4.0000e+00,  5.0000e+00,  6.0000e+00]])

list_=[] have no element but torch.empty() have some trash values.
How can I use torch.cat() without trash values ?
If this is impossible, how can I remove trash values from x ?

1 Like

Hi,

empty() returns a Tensor with uninitialized memory. You can check the doc for more details.

What do you mean by “empty” here? If you want a Tensor with no data in it. you can create a Tensor with 0 size: x = torch.empty(0, 3).

5 Likes

@albanD Thanks for you answer.
In my case, torch.cat() is much slower than list.append().
So I’m triying to change my code to use append() to list.

new_dataset_x = []
new_dataset_y = []
for step, data in train_loader:
   inputs, labels = data # inputs.shape == [64,3,28,28]
    ...
   # Get some imgs from inputs under some conditions
   new_dataset_x.append(ok_imgs) # ok_imgs.shape == [??,3,28,28]. first dim size is not equal on every step.
   new_dataset_y.append(ok_labels) # ok.labels.shape == [??]

As a result,  each element of new_dataset have different size of 1st dimension.
new_dataset_x[0].shape : [54,3,28,28]
new_dataset_x[1].shape : [34,3,28,28]
...
How can I make DataLoader with this?
Or this approach is inefficient, please recommend another way.

  • One trick is to .append() to a list and then do a single .cat() at the end with the whole list.
  • The other is to create a custom Dataset that takes this list as input and give that to your Dataloader. Note that doing the .cat() and using a TensorDataset will be faster during training !
4 Likes

I find that stack is also a good alternative (especially with nested list of tensors or something like that). e.g.


# %%

import torch

# stack vs cat

# cat "extends" a list in the given dimension e.g. adds more rows or columns

x = torch.randn(2, 3)
print(f'{x.size()}')

# add more rows (thus increasing the dimensionality of the column space to 2 -> 6)
xnew_from_cat = torch.cat((x, x, x), 0)
print(f'{xnew_from_cat.size()}')

# add more columns (thus increasing the dimensionality of the row space to 3 -> 9)
xnew_from_cat = torch.cat((x, x, x), 1)
print(f'{xnew_from_cat.size()}')

print()

# stack serves the same role as append in lists. i.e. it doesn't change the original
# vector space but instead adds a new index to the new tensor, so you retain the ability
# get the original tensor you added to the list by indexing in the new dimension
xnew_from_stack = torch.stack((x, x, x, x), 0)
print(f'{xnew_from_stack.size()}')

xnew_from_stack = torch.stack((x, x, x, x), 1)
print(f'{xnew_from_stack.size()}')

xnew_from_stack = torch.stack((x, x, x, x), 2)
print(f'{xnew_from_stack.size()}')

# default appends at the from
xnew_from_stack = torch.stack((x, x, x, x))
print(f'{xnew_from_stack.size()}')

print('I like to think of xnew_from_stack as a \"tensor list\" that you can pop from the front')

print()

lst = []
print(f'{x.size()}')
for i in range(10):
    x += i  # say we do something with x at iteration i
    lst.append(x)
# lstt = torch.stack([x for _ in range(10)])
lstt = torch.stack(lst)
print(lstt.size())

print()

# lst = []
# print(f'{x.size()}')
# for i in range(10):
#     x += i  # say we do something with x at iteration i
#     for j in range(11):
#         x += j
#         lstx
#     lst.append(x)
# # lstt = torch.stack([x for _ in range(10)])
# lstt = torch.stack(lst)
# print(lstt.size())

good links: