From d3ec02dc1a3eefd7acc91247c41bce4f8eef1bc8 Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Wed, 22 Jan 2025 08:52:18 -0600 Subject: [PATCH 1/8] Add in copy_blob to client --- lib/azure_blob/client.rb | 18 +++++++++++++++++- test/client/test_client.rb | 11 +++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/lib/azure_blob/client.rb b/lib/azure_blob/client.rb index 21565d8..567bbef 100644 --- a/lib/azure_blob/client.rb +++ b/lib/azure_blob/client.rb @@ -77,6 +77,22 @@ def get_blob(key, options = {}) Http.new(uri, headers, signer:).get end + # Copy a blob + # + # Calls to {Copy Blob}[https://learn.microsoft.com/en-us/rest/api/storageservices/copy-blob] + # + # Takes a key (path) and a source_key (path). + # + def copy_blob(key, source_key) + uri = generate_uri("#{container}/#{key}") + + headers = { + "x-ms-copy-source": generate_uri("#{container}/#{source_key}").to_s, + } + + Http.new(uri, headers, signer:).put + end + # Delete a blob # # Calls to {Delete Blob}[https://learn.microsoft.com/en-us/rest/api/storageservices/delete-blob] @@ -202,7 +218,7 @@ def create_container(options = {}) uri = generate_uri(container) headers = {} headers[:"x-ms-blob-public-access"] = "blob" if options[:public_access] - headers[:"x-ms-blob-public-access"] = options[:public_access] if ["container","blob"].include?(options[:public_access]) + headers[:"x-ms-blob-public-access"] = options[:public_access] if [ "container", "blob" ].include?(options[:public_access]) uri.query = URI.encode_www_form(restype: "container") response = Http.new(uri, headers, signer:).put diff --git a/test/client/test_client.rb b/test/client/test_client.rb index 2d29877..44a820c 100644 --- a/test/client/test_client.rb +++ b/test/client/test_client.rb @@ -175,6 +175,17 @@ def test_download_404 assert_raises(AzureBlob::Http::FileNotFoundError) { client.get_blob(key) } end + def test_copy + client.create_block_blob(key, content) + assert_equal content, client.get_blob(key) + + copy_key = "#{key}_copy" + + client.copy_blob(copy_key, key) + + assert_equal content, client.get_blob(copy_key) + end + def test_delete client.create_block_blob(key, content) assert_equal content, client.get_blob(key) From 879c5f748e6e6e2c1cc6566bbf7a6c39b77acef8 Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Wed, 22 Jan 2025 09:02:55 -0600 Subject: [PATCH 2/8] Update compose method to use copy_blob operation when compose is only passed a single source key --- .../service/azure_blob_service.rb | 25 +++++++++++-------- lib/azure_blob/client.rb | 4 +-- test/rails/service/shared_service_tests.rb | 19 ++++++++++++++ 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/lib/active_storage/service/azure_blob_service.rb b/lib/active_storage/service/azure_blob_service.rb index a6836df..62ac146 100644 --- a/lib/active_storage/service/azure_blob_service.rb +++ b/lib/active_storage/service/azure_blob_service.rb @@ -123,16 +123,21 @@ def headers_for_direct_upload(key, content_type:, checksum:, filename: nil, disp def compose(source_keys, destination_key, filename: nil, content_type: nil, disposition: nil, custom_metadata: {}) content_disposition = content_disposition_with(type: disposition, filename: filename) if disposition && filename - client.create_append_blob( - destination_key, - content_type: content_type, - content_disposition: content_disposition, - metadata: custom_metadata, - ) - - source_keys.each do |source_key| - stream(source_key) do |chunk| - client.append_blob_block(destination_key, chunk) + # use copy_blob operation if composing a new blob from a single existing blob + if source_keys.length == 1 + client.copy_blob(destination_key, source_keys[0], metadata: custom_metadata) + else + client.create_append_blob( + destination_key, + content_type: content_type, + content_disposition: content_disposition, + metadata: custom_metadata, + ) + + source_keys.each do |source_key| + stream(source_key) do |chunk| + client.append_blob_block(destination_key, chunk) + end end end end diff --git a/lib/azure_blob/client.rb b/lib/azure_blob/client.rb index 567bbef..8eb20c8 100644 --- a/lib/azure_blob/client.rb +++ b/lib/azure_blob/client.rb @@ -83,14 +83,14 @@ def get_blob(key, options = {}) # # Takes a key (path) and a source_key (path). # - def copy_blob(key, source_key) + def copy_blob(key, source_key, options = {}) uri = generate_uri("#{container}/#{key}") headers = { "x-ms-copy-source": generate_uri("#{container}/#{source_key}").to_s, } - Http.new(uri, headers, signer:).put + Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put end # Delete a blob diff --git a/test/rails/service/shared_service_tests.rb b/test/rails/service/shared_service_tests.rb index fbe2d2d..8fe3c6c 100644 --- a/test/rails/service/shared_service_tests.rb +++ b/test/rails/service/shared_service_tests.rb @@ -158,5 +158,24 @@ module ActiveStorage::Service::SharedServiceTests assert_equal "Together", @service.download(destination_key) end + + test "compose from single blob" do + keys = [ SecureRandom.base58(24) ] + data = %w[Together] + keys.zip(data).each do |key, data| + @service.upload( + key, + StringIO.new(data), + checksum: Digest::MD5.base64digest(data), + disposition: :attachment, + filename: ActiveStorage::Filename.new("test.html"), + content_type: "text/html", + ) + end + destination_key = SecureRandom.base58(24) + @service.compose(keys, destination_key) + + assert_equal "Together", @service.download(destination_key) + end end end From 6b9d5d55954d8818a4822c40927148bd6f9e12fe Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Wed, 22 Jan 2025 14:55:55 -0600 Subject: [PATCH 3/8] Switch to synchronous copy method which has 256MiB limit --- lib/active_storage/service/azure_blob_service.rb | 3 ++- lib/azure_blob/client.rb | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/active_storage/service/azure_blob_service.rb b/lib/active_storage/service/azure_blob_service.rb index 62ac146..f4ea673 100644 --- a/lib/active_storage/service/azure_blob_service.rb +++ b/lib/active_storage/service/azure_blob_service.rb @@ -124,7 +124,8 @@ def compose(source_keys, destination_key, filename: nil, content_type: nil, disp content_disposition = content_disposition_with(type: disposition, filename: filename) if disposition && filename # use copy_blob operation if composing a new blob from a single existing blob - if source_keys.length == 1 + # and that single blob is <= 256 MiB which is the upper limit for copy_blob operation + if source_keys.length == 1 && client.get_blob_properties(source_keys[0]).size <= 256.megabytes client.copy_blob(destination_key, source_keys[0], metadata: custom_metadata) else client.create_append_blob( diff --git a/lib/azure_blob/client.rb b/lib/azure_blob/client.rb index 8eb20c8..e3d00d3 100644 --- a/lib/azure_blob/client.rb +++ b/lib/azure_blob/client.rb @@ -79,7 +79,7 @@ def get_blob(key, options = {}) # Copy a blob # - # Calls to {Copy Blob}[https://learn.microsoft.com/en-us/rest/api/storageservices/copy-blob] + # Calls to {Copy Blob From URL}[https://learn.microsoft.com/en-us/rest/api/storageservices/copy-blob-from-url] # # Takes a key (path) and a source_key (path). # @@ -88,6 +88,7 @@ def copy_blob(key, source_key, options = {}) headers = { "x-ms-copy-source": generate_uri("#{container}/#{source_key}").to_s, + "x-ms-requires-sync": "true", } Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put From 358fe2cbffde22add0b9af18ccfe2b722cf3f67c Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Thu, 23 Jan 2025 14:53:15 -0600 Subject: [PATCH 4/8] chore: add in test for compose method --- test/rails/service/azure_blob_service_test.rb | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test/rails/service/azure_blob_service_test.rb b/test/rails/service/azure_blob_service_test.rb index 3e5b6c5..a7a69ac 100644 --- a/test/rails/service/azure_blob_service_test.rb +++ b/test/rails/service/azure_blob_service_test.rb @@ -116,4 +116,25 @@ class ActiveStorage::Service::AzureBlobServiceTest < ActiveSupport::TestCase ensure @service.delete(key) end + + test "composing a blob from one source blob" do + key = SecureRandom.base58(24) + data = "Something else entirely!" + + Tempfile.open do |file| + file.write(data) + file.rewind + @service.upload(key, file) + end + + assert_equal data, @service.download(key) + + copy_key = SecureRandom.base58(24) + @service.compose([ key ], copy_key) + + assert_equal data, @service.download(copy_key) + ensure + @service.delete key + @service.delete copy_key + end end From 1b4c6b15e4753c81f70d8e834f856de401002f8a Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Thu, 23 Jan 2025 16:18:13 -0600 Subject: [PATCH 5/8] chore: switch to Put blob from url method --- lib/active_storage/service/azure_blob_service.rb | 4 ++-- lib/azure_blob/client.rb | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/active_storage/service/azure_blob_service.rb b/lib/active_storage/service/azure_blob_service.rb index f4ea673..1efa776 100644 --- a/lib/active_storage/service/azure_blob_service.rb +++ b/lib/active_storage/service/azure_blob_service.rb @@ -124,8 +124,8 @@ def compose(source_keys, destination_key, filename: nil, content_type: nil, disp content_disposition = content_disposition_with(type: disposition, filename: filename) if disposition && filename # use copy_blob operation if composing a new blob from a single existing blob - # and that single blob is <= 256 MiB which is the upper limit for copy_blob operation - if source_keys.length == 1 && client.get_blob_properties(source_keys[0]).size <= 256.megabytes + # and that single blob is <= 5000 MiB which is the upper limit for copy_blob operation + if source_keys.length == 1 && client.get_blob_properties(source_keys[0]).size <= 5000.megabytes client.copy_blob(destination_key, source_keys[0], metadata: custom_metadata) else client.create_append_blob( diff --git a/lib/azure_blob/client.rb b/lib/azure_blob/client.rb index e3d00d3..27a6cd4 100644 --- a/lib/azure_blob/client.rb +++ b/lib/azure_blob/client.rb @@ -79,7 +79,7 @@ def get_blob(key, options = {}) # Copy a blob # - # Calls to {Copy Blob From URL}[https://learn.microsoft.com/en-us/rest/api/storageservices/copy-blob-from-url] + # Calls to {Put Blob From URL}[https://learn.microsoft.com/en-us/rest/api/storageservices/put-blob-from-url] # # Takes a key (path) and a source_key (path). # @@ -87,8 +87,9 @@ def copy_blob(key, source_key, options = {}) uri = generate_uri("#{container}/#{key}") headers = { + "Content-Length": "0", "x-ms-copy-source": generate_uri("#{container}/#{source_key}").to_s, - "x-ms-requires-sync": "true", + "x-ms-blob-type": "BlockBlob", } Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put From 9d6b5d5576f9a3b3b30c9d81db6213346ed446ee Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Thu, 23 Jan 2025 16:40:02 -0600 Subject: [PATCH 6/8] chore: fix issue Content-Length header for copy blob operation --- lib/azure_blob/client.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/azure_blob/client.rb b/lib/azure_blob/client.rb index 27a6cd4..84208bb 100644 --- a/lib/azure_blob/client.rb +++ b/lib/azure_blob/client.rb @@ -87,7 +87,7 @@ def copy_blob(key, source_key, options = {}) uri = generate_uri("#{container}/#{key}") headers = { - "Content-Length": "0", + "Content-Length": 0, "x-ms-copy-source": generate_uri("#{container}/#{source_key}").to_s, "x-ms-blob-type": "BlockBlob", } From 9e1bbbeddec20f0df263063b996b51b2aaf81990 Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Fri, 24 Jan 2025 08:22:58 -0600 Subject: [PATCH 7/8] chore: sign copy source uri --- lib/azure_blob/client.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/azure_blob/client.rb b/lib/azure_blob/client.rb index 84208bb..11c139f 100644 --- a/lib/azure_blob/client.rb +++ b/lib/azure_blob/client.rb @@ -86,9 +86,11 @@ def get_blob(key, options = {}) def copy_blob(key, source_key, options = {}) uri = generate_uri("#{container}/#{key}") + source_uri = signed_uri(source_key, permissions: "r", expiry: Time.at(Time.now.to_i + 300).utc.iso8601) + headers = { "Content-Length": 0, - "x-ms-copy-source": generate_uri("#{container}/#{source_key}").to_s, + "x-ms-copy-source": source_uri.to_s, "x-ms-blob-type": "BlockBlob", } From 674fe68e217e74e7270b78b477ca224ea541ce4b Mon Sep 17 00:00:00 2001 From: Eric Enns <492127+ericenns@users.noreply.github.com> Date: Thu, 6 Feb 2025 14:35:53 -0600 Subject: [PATCH 8/8] chore: revert to using copy blob from url option and update changelog --- CHANGELOG.md | 3 +++ lib/active_storage/service/azure_blob_service.rb | 4 ++-- lib/azure_blob/client.rb | 5 ++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e3a578c..20b267a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## [Unreleased] +- Add `copy_blob` +- Update `compose` to use `copy_blob` if 1 source key and blob is <= 256MiB + ## [0.5.6] 2025-01-17 - Fix user delegation key not refreshing (#14) diff --git a/lib/active_storage/service/azure_blob_service.rb b/lib/active_storage/service/azure_blob_service.rb index 1efa776..f4ea673 100644 --- a/lib/active_storage/service/azure_blob_service.rb +++ b/lib/active_storage/service/azure_blob_service.rb @@ -124,8 +124,8 @@ def compose(source_keys, destination_key, filename: nil, content_type: nil, disp content_disposition = content_disposition_with(type: disposition, filename: filename) if disposition && filename # use copy_blob operation if composing a new blob from a single existing blob - # and that single blob is <= 5000 MiB which is the upper limit for copy_blob operation - if source_keys.length == 1 && client.get_blob_properties(source_keys[0]).size <= 5000.megabytes + # and that single blob is <= 256 MiB which is the upper limit for copy_blob operation + if source_keys.length == 1 && client.get_blob_properties(source_keys[0]).size <= 256.megabytes client.copy_blob(destination_key, source_keys[0], metadata: custom_metadata) else client.create_append_blob( diff --git a/lib/azure_blob/client.rb b/lib/azure_blob/client.rb index 11c139f..349837c 100644 --- a/lib/azure_blob/client.rb +++ b/lib/azure_blob/client.rb @@ -79,7 +79,7 @@ def get_blob(key, options = {}) # Copy a blob # - # Calls to {Put Blob From URL}[https://learn.microsoft.com/en-us/rest/api/storageservices/put-blob-from-url] + # Calls to {Put Blob From URL}[https://learn.microsoft.com/en-us/rest/api/storageservices/copy-blob-from-url] # # Takes a key (path) and a source_key (path). # @@ -89,9 +89,8 @@ def copy_blob(key, source_key, options = {}) source_uri = signed_uri(source_key, permissions: "r", expiry: Time.at(Time.now.to_i + 300).utc.iso8601) headers = { - "Content-Length": 0, "x-ms-copy-source": source_uri.to_s, - "x-ms-blob-type": "BlockBlob", + "x-ms-requires-sync": "true", } Http.new(uri, headers, signer:, **options.slice(:metadata, :tags)).put