Skip to content

Memory leak #2719

@rztz

Description

@rztz

Describe the bug
The CudaJit Backend leaks memory when creating and dropping Tensors.

To Reproduce

fn test_memory_leak() {
    unsafe { libloading::Library::new("libtorch.so").unwrap() };

    // number of loops to overflow GPU memory are dependend on your hardware
    let loops = 10_000;

    // this works and memory does not grow
    test_device_memory_leak::<burn::backend::Candle>(
        &burn::backend::candle::CandleDevice::cuda(0),
        loops,
    );

    // this works and memory does not grow
    test_device_memory_leak::<burn::backend::LibTorch>(
        &burn::backend::libtorch::LibTorchDevice::Cuda(0),
        loops,
    );

    // this overflows my 10 gig memory at aboout loop 2600
    test_device_memory_leak::<burn::backend::CudaJit>(
        &burn::backend::cuda_jit::CudaDevice::new(0),
        loops,
    );
}

fn test_device_memory_leak<B: Backend>(device: &<B as Backend>::Device, loops: usize) {
    const DIM: usize = 4;
    let shape = [32, 4, 84, 84];
    let l = shape.iter().product();
    let data_v = vec![0; l];

    for i in 0..loops {
        if i % 100 == 0 {
            println!("i: {}", i);
        }
        let td = TensorData::new(data_v.clone(), shape);
        let t: Tensor<B, DIM> = Tensor::from_data(td, device);
        drop(t);
    }
}

Expected behavior
drop(t) should free the memory

Screenshots

Desktop (please complete the following information):
Fedora 41, Nvidia cuda 12.6

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions