From 09194c61cae7617705d661170fe60d8220d4351e Mon Sep 17 00:00:00 2001 From: Christian Sutter Date: Wed, 29 Nov 2023 15:33:26 +0000 Subject: [PATCH] Remove body content size limit Turns out this was a metadata size limit restriction, which explains why we still saw the error after truncating the content. Now that we have removed additional searchable text from the metadata, we should be able to safely remove any limits anyway. --- app/models/concerns/publishing_api/content.rb | 7 ------- .../models/concerns/publishing_api/content_spec.rb | 14 -------------- 2 files changed, 21 deletions(-) diff --git a/app/models/concerns/publishing_api/content.rb b/app/models/concerns/publishing_api/content.rb index 2bc6210..81db2ae 100644 --- a/app/models/concerns/publishing_api/content.rb +++ b/app/models/concerns/publishing_api/content.rb @@ -45,12 +45,6 @@ module Content ].map { JsonPath.new(_1, use_symbols: true) }.freeze INDEXABLE_CONTENT_SEPARATOR = "\n".freeze - # The limit of content length on Discovery Engine API is currently 500KB, so we need to truncate - # the content to a reasonable size. - # - # TODO: Try and get limit increased? - INDEXABLE_CONTENT_MAX_BYTE_SIZE = 480.kilobytes - # Extracts a single string of indexable unstructured content from the document. def content values_from_json_paths = INDEXABLE_CONTENT_VALUES_JSON_PATHS.map do |item| @@ -66,7 +60,6 @@ def content .flatten .compact_blank .join(INDEXABLE_CONTENT_SEPARATOR) - .truncate_bytes(INDEXABLE_CONTENT_MAX_BYTE_SIZE) end end end diff --git a/spec/models/concerns/publishing_api/content_spec.rb b/spec/models/concerns/publishing_api/content_spec.rb index cc681a4..694e650 100644 --- a/spec/models/concerns/publishing_api/content_spec.rb +++ b/spec/models/concerns/publishing_api/content_spec.rb @@ -72,20 +72,6 @@ it { is_expected.to eq("

Foo

\nbar\n

Bar

\nbaz") } end - describe "with excessively large content" do - let(:document_hash) do - { - details: { - body: "a" * 600.kilobytes, - }, - } - end - - it "truncates the content" do - expect(extracted_content.bytesize).to be <= 500.kilobytes - end - end - describe "without any fields" do let(:document_hash) do {