JuliaGPU · maleadt · Mar 10, 2025 · Mar 8, 2025 · Mar 9, 2025
diff --git a/src/texture.jl b/src/texture.jl
@@ -97,6 +97,7 @@ end
 
 # idempotency
 CuTextureArray{T,N}(xs::CuTextureArray{T,N}) where {T,N} = xs
+CuTextureArray(xs::CuTextureArray{T,N}) where {T,N} = xs
 
 CuTextureArray(A::AbstractArray{T,N}) where {T,N} = CuTextureArray{T,N}(A)
 

diff --git a/test/base/texture.jl b/test/base/texture.jl
@@ -51,6 +51,8 @@ end
     texarr1D = CuTextureArray(d_a1D)
     tex1D = CuTexture(texarr1D)
     @test fetch_all(tex1D) == d_a1D
+    array_mem = texarr1D.mem
+    @test startswith(sprint(show, array_mem), "$testheight-element ArrayMemory")
 
     texarr2D = CuTextureArray(d_a2D)
     tex2D = CuTexture(texarr2D)
@@ -59,6 +61,14 @@ end
     texarr3D = CuTextureArray(d_a3D)
     tex3D = CuTexture(texarr3D)
     @test fetch_all(tex3D) == d_a3D
+
+    # test the underlying ArrayMemory too
+    array_mem = texarr3D.mem
+    @test_throws ErrorException("Opaque array memory does not have a definite size") sizeof(array_mem)
+    @test size(array_mem) == size(d_a3D)
+    @test length(array_mem) == length(d_a3D)
+    @test ndims(array_mem) == ndims(d_a3D)
+
 end
 
 @testset "CuTextureArray(::Array)" begin
@@ -76,13 +86,20 @@ end
     texarr2D = CuTextureArray(a2D)
     tex2D = CuTexture(texarr2D)
     @test Array(fetch_all(tex2D)) == a2D
+    texarr2D_2 = CuTextureArray(texarr2D)
+    tex2D_2 = CuTexture(texarr2D_2)
+    @test Array(fetch_all(tex2D_2)) == a2D
+
 
     tex2D_dir = CuTexture(CuTextureArray(a2D))
     @test Array(fetch_all(tex2D_dir)) == a2D
 
     texarr3D = CuTextureArray(a3D)
     tex3D = CuTexture(texarr3D)
     @test Array(fetch_all(tex3D)) == a3D
+    texarr3D_2 = CuTextureArray(texarr3D)
+    tex3D_2 = CuTexture(texarr3D_2)
+    @test Array(fetch_all(tex3D_2)) == a3D
 end
 
 @testset "CuTexture(::CuArray)" begin

diff --git a/test/core/cudadrv.jl b/test/core/cudadrv.jl
@@ -435,8 +435,15 @@ nb = sizeof(data)
 typed_pointer(buf::Union{CUDA.DeviceMemory, CUDA.UnifiedMemory}, T) = convert(CuPtr{T}, buf)
 typed_pointer(buf::CUDA.HostMemory, T)                              = convert(Ptr{T},   buf)
 
-# allocations and copies
-for srcTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory],
+@testset "showing" begin
+    for (Ty, str) in zip([CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory], ("DeviceMemory", "HostMemory", "UnifiedMemory"))
+        dummy = CUDA.alloc(Ty, 0)
+        @test startswith(sprint(show, dummy), str)
+        CUDA.free(dummy)
+    end
+end
+
+@testset "allocations and copies, src $srcTy dst $dstTy" for srcTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory],
     dstTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory]
 
     dummy = CUDA.alloc(srcTy, 0)
@@ -472,6 +479,7 @@ for srcTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory],
 
     # test device with context in which pointer was allocated.
     @test device(typed_pointer(src, T)) == device()
+    @test context(typed_pointer(src, T)) == context()
     if !memory_pools_supported(device())
         # NVIDIA bug #3319609
         @test context(typed_pointer(src, T)) == context()
@@ -495,17 +503,15 @@ for srcTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory],
     CUDA.free(dst)
 end
 
-# pointer attributes
-let
+@testset "pointer attributes" begin
     src = CUDA.alloc(CUDA.DeviceMemory, nb)
 
     attribute!(typed_pointer(src, T), CUDA.POINTER_ATTRIBUTE_SYNC_MEMOPS, 0)
 
     CUDA.free(src)
 end
 
-# asynchronous operations
-let
+@testset "asynchronous operations" begin
     src = CUDA.alloc(CUDA.DeviceMemory, nb)
 
     unsafe_copyto!(typed_pointer(src, T), pointer(data), N; async=true)
@@ -515,8 +521,7 @@ let
     CUDA.free(src)
 end
 
-# pinned memory
-let
+@testset "pinned memory" begin
     # create a pinned and mapped buffer
     src = CUDA.alloc(CUDA.HostMemory, nb, CUDA.MEMHOSTALLOC_DEVICEMAP)
 
@@ -547,10 +552,17 @@ if attribute(device(), CUDA.DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED) != 0
     @test ref == data
 
     CUDA.unregister(src)
+
+    # with a RefValue
+    src = Ref{T}(T(42))
+    CUDA.pin(src)
+    cpu_ptr = Base.unsafe_convert(Ptr{T}, src)
+    ref = Array{T}(undef, 1)
+    unsafe_copyto!(pointer(ref), cpu_ptr, 1)
+    @test ref == [T(42)]
 end
 
-# unified memory
-let
+@testset "unified memory" begin
     src = CUDA.alloc(CUDA.UnifiedMemory, nb)
 
     @test_throws BoundsError CUDA.prefetch(src, 2*nb; device=CUDA.DEVICE_CPU)
@@ -571,8 +583,7 @@ let
     CUDA.free(src)
 end
 
-# 3d memcpy
-let
+@testset "3d memcpy" begin
     # TODO: use cuMemAllocPitch (and put pitch in buffer?) to actually get benefit from this
 
     data = collect(reshape(1:27, 3, 3, 3))