# Huge memory difference between PyTorch and Tensorflow 1.15

Hi,
I am currently trying to refactor some old TensorFlow code (1.15) into PyTorch (1.7.1). The code takes an image and upscales it using bilinear interpolation. To make sure both functions return the same results, I saved them as npy file and wrote a script to compare them. (See code snippets below)
Until a resize factor of ~5 they are equal to 4 decimal after they start introducing numerical differences.

However, the memory used is VERY different as you can see in this image. Also, PyTorch crashes after a resize scale of 60.

Can anyone tell me why this is happening or how to fix it?

Torch script:

``````import torch
import torch.nn.functional as F
import sys
from math import log2
import numpy as np
from PIL import Image

_suffixes = ["bytes", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"]

def file_size(size):
order = int(log2(size) / 10) if size else 0
return "{:.4g} {}".format(size / (1 << (order * 10)), _suffixes[order])

def main(resizescale=4):
device = torch.device("cuda")
w = 256
h = 256
print(f"Creating image")

np_image = np.array(Image.open("test.png")).transpose((2, 0, 1))[None, ...]

image = torch.tensor(np_image, device=device, dtype=torch.float32)

print(f"Resizing with resize scale: {resizescale}")
resized = F.interpolate(
image,
size=[int(resizescale * h), int(resizescale * w)],
mode="bilinear",
align_corners=True,
)

print(f"current size: {resized.shape}")

sum = resized.sum()
sum.backward()

np.save("torch_image.npy", resized.detach().cpu().numpy())

print(f"current mem: {file_size(torch.cuda.max_memory_allocated(0))}")

if __name__ == "__main__":
main(float(sys.argv))
``````

Tensorflow script:

``````import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
import sys
from math import log2
import numpy as np
from PIL import Image

_suffixes = ["bytes", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"]

def file_size(size):
order = int(log2(size) / 10) if size else 0
return "{:.4g} {}".format(size / (1 << (order * 10)), _suffixes[order])

def main(resizescale=4):
# create an image tensor
w = 256
h = 256
print(f"Creating image")
np_image = np.array(Image.open("test.png"))[None, ...]
image = tf.placeholder(tf.dtypes.float32, shape=(1, h, w, 3))
resized = [int(resizescale * h), int(resizescale * w)]

print(f"current size: {resized}")
size = tf.constant(resized, dtype=tf.int32)
d = tf.compat.v1.image.resize(
image, size, method=tf.image.ResizeMethod.BILINEAR, align_corners=True
)  # upsample w/ BICUBIC -> artifacts

sum = tf.reduce_sum(d)
with tf.compat.v1.Session() as sess:
cur_image, gradient = sess.run([d, g], feed_dict={image: np_image})
np.save("tf_image.npy", cur_image)
max_mem = sess.run(tf.contrib.memory_stats.MaxBytesInUse())
mem = sess.run(tf.contrib.memory_stats.BytesInUse())
print(f"mem: {file_size(mem)}")
print(f"max_mem: {file_size(max_mem)}")

if __name__ == "__main__":
main(float(sys.argv))
``````

Compare script:

``````import numpy as np

torch_image_reshape = torch_image.transpose((0, 2, 3, 1))

print(np.mean(np.abs(tf_image - torch_image_reshape)))
np.testing.assert_array_almost_equal(tf_image, torch_image_reshape, decimal=4)

``````

If anyone comes across this issue. I found out that refactoring the loss code into its own function resolves the problem.

Probably TensorFlow optimizes the graph structure, while the previous PyTorch-code still contains a reference to the resized image so PyTorch keeps it in memory, this is not the case after the refactoring.

After the change, the loss looks more or less the same.

Here is the fixed code:

``````import torch
import torch.nn.functional as F
import sys
from math import log2
import numpy as np
from PIL import Image

_suffixes = ["bytes", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"]

def file_size(size):
order = int(log2(size) / 10) if size else 0
return "{:.4g} {}".format(size / (1 << (order * 10)), _suffixes[order])

def get_loss(image, resizescale=4):
# print(f"current mem: {file_size(torch.cuda.memory_allocated(0))}")
w = 256
h = 256
print(f"Resizing with resize scale: {resizescale}")
resized = F.interpolate(
image,
size=[int(resizescale * h), int(resizescale * w)],
mode="bilinear",
align_corners=True,
)

print(f"current size: {resized.shape}")

sum = resized.sum()
return sum

def main(resizescale=4, save=True):
# create an image tensor
device = torch.device("cuda")

print(f"Creating image")
np.random.seed(42)
# np_image = np.random.rand(1, 1, h, w)
np_image = np.array(Image.open("test.png")).transpose((2, 0, 1))[None, ...]
print(f"geting sum")

image = torch.tensor(np_image, device=device, dtype=torch.float32)
sum = get_loss(image, resizescale=resizescale)
sum.backward()
# if save:
#     np.save("torch_image.npy", resized.detach().cpu().numpy())