How to get stream operators in custom backend

When I use a custom backend, I find that the fx graph that the custom compiler gets does not have the stream related operations.

Then I read the trace process and found that the graph dropped those stream operations after aot_module_simplified.

When I use aot_module_simplified, how can I achieve that the fx graph obtained by custom compiler contains stream-related operations?

Here is my test script:

import torch
import torch.nn as nn
import logging
torch._logging.set_logs(dynamo=logging.DEBUG,aot=logging.DEBUG,output_code=True,graph_code=True,inductor=logging.DEBUG)

class Layer(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        stream2 = torch.cuda.Stream()
        with torch.cuda.stream(stream2):
            z = x + 1
        y = x - 1

        cur = torch.cuda.current_stream()
        cur.wait_stream(stream2)
        y = y + z
        return y

mm = Layer()
x=torch.randn([4]).cuda()

from torch._functorch.aot_autograd import aot_module_simplified
def toy_backend(gm, sample_inputs):
    return gm
def aot_toy_backend(gm, sample_inputs):
    return aot_module_simplified(gm, sample_inputs, fw_compiler=toy_backend)

mmc = torch.compile(mm, backend=aot_toy_backend)  # "inductor")
yc= mmc(x)