I am trying to run a script 1 that has 2 cuda tensor variables, and its executing fine. Now I has some use case, that involves running script 2 that can access those cuda tensors of script 1. Now I think I’ve figured it out (I think). But I am getting some errors/warnings when I run script 2.
Script 1 -
import io
import time
from multiprocessing import shared_memory
import torch
def share_tensor(tensor, name=None):
s = time.perf_counter()
with io.BytesIO() as buffer:
torch.save(tensor buffer)
tensor_bytes = buffer.getvalue()
size = len(tensor_bytes)
shm = shared_memory.SharedMemory(create=True, size=size, name=name)
shm.buf[:size] = tensor_bytes
print(time.perf_counter() - s)
print(size)
tensor_1 = torch.randn(100, device="cuda")
share_tensor(tensor_1, "tensor_1")
tensor_2 = torch.randn(100, device="cuda")
share_tensor(tensor_2, "tensor_2")
tensor_3 = torch.randn(100, device="cuda")
share_tensor(tensor_3, "tensor_3")
time.sleep(1000)
Script 2:
import io
import sys
import time
from multiprocessing import shared_memory
import torch
size = int(sys.argv[1]) if len(sys.argv) > 1 else 1564
def load_shared_tensor(name, size):
try:
start = time.perf_counter()
shm = shared_memory.SharedMemory(name=name)
tensor_bytes = shm.buf[:size]
with io.BytesIO(tensor_bytes) as buffer:
tensor = torch.load(buffer, map_location="cuda")
print(f"Loaded tensor from {name} in {time.perf_counter() - start:.4f} seconds")
return tensor
except Exception as e:
print(f"Failed to read `{name}`: {e}")
# Load tensors from shared memory segments
tensor_1 = load_shared_tensor("tensor_1", size)
tensor_2 = load_shared_tensor("tensor_2", size)
tensor_3 = load_shared_tensor("tensor_3", size)
My output with warnings/error:
Loaded tensor from tensor_1 in 0.1899 seconds
Exception ignored in: <function SharedMemory.__del__ at 0x7d28031e0700>
Traceback (most recent call last):
File "/usr/lib/python3.9/multiprocessing/shared_memory.py", line 184, in __del__
self.close()
File "/usr/lib/python3.9/multiprocessing/shared_memory.py", line 227, in close
self._mmap.close()
BufferError: cannot close exported pointers exist
Loaded tensor from tensor_2 in 0.0004 seconds
Exception ignored in: <function SharedMemory.__del__ at 0x7d28031e0700>
Traceback (most recent call last):
File "/usr/lib/python3.9/multiprocessing/shared_memory.py", line 184, in __del__
self.close()
File "/usr/lib/python3.9/multiprocessing/shared_memory.py", line 227, in close
self._mmap.close()
BufferError: cannot close exported pointers exist
Loaded tensor from tensor_3 in 0.0004 seconds
Exception ignored in: <function SharedMemory.__del__ at 0x7d28031e0700>
Traceback (most recent call last):
File "/usr/lib/python3.9/multiprocessing/shared_memory.py", line 184, in __del__
self.close()
File "/usr/lib/python3.9/multiprocessing/shared_memory.py", line 227, in close
self._mmap.close()
BufferError: cannot close exported pointers exist
/usr/lib/python3.9/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 3 leaked shared_memory objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '
I am able to share the tensor… but couldn’t figure out how to avoid this warning/error or how to suppress this message. Any suggestions? Is there any better way in Pytorch to achieve the same?
Thanks for your time.