Skip to content

Commit

Permalink
fix(thumbnail cache): Enabling force parameter on screenshot/thumbnai…
Browse files Browse the repository at this point in the history
…l cache (apache#31757)

Co-authored-by: Kamil Gabryjelski <[email protected]>
  • Loading branch information
fisjac and kgabryje authored Jan 31, 2025
1 parent c590e90 commit 7db0589
Show file tree
Hide file tree
Showing 13 changed files with 605 additions and 276 deletions.
111 changes: 66 additions & 45 deletions superset/charts/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from werkzeug.wrappers import Response as WerkzeugResponse
from werkzeug.wsgi import FileWrapper

from superset import app, is_feature_enabled, thumbnail_cache
from superset import app, is_feature_enabled
from superset.charts.filters import (
ChartAllTextFilter,
ChartCertifiedFilter,
Expand Down Expand Up @@ -84,7 +84,12 @@
from superset.tasks.thumbnails import cache_chart_thumbnail
from superset.tasks.utils import get_current_user
from superset.utils import json
from superset.utils.screenshots import ChartScreenshot, DEFAULT_CHART_WINDOW_SIZE
from superset.utils.screenshots import (
ChartScreenshot,
DEFAULT_CHART_WINDOW_SIZE,
ScreenshotCachePayload,
StatusValues,
)
from superset.utils.urls import get_url_path
from superset.views.base_api import (
BaseSupersetModelRestApi,
Expand Down Expand Up @@ -564,8 +569,14 @@ def cache_screenshot(self, pk: int, **kwargs: Any) -> WerkzeugResponse:
schema:
$ref: '#/components/schemas/screenshot_query_schema'
responses:
200:
description: Chart async result
content:
application/json:
schema:
$ref: "#/components/schemas/ChartCacheScreenshotResponseSchema"
202:
description: Chart async result
description: Chart screenshot task created
content:
application/json:
schema:
Expand All @@ -580,6 +591,7 @@ def cache_screenshot(self, pk: int, **kwargs: Any) -> WerkzeugResponse:
$ref: '#/components/responses/500'
"""
rison_dict = kwargs["rison"]
force = rison_dict.get("force")
window_size = rison_dict.get("window_size") or DEFAULT_CHART_WINDOW_SIZE

# Don't shrink the image if thumb_size is not specified
Expand All @@ -591,25 +603,36 @@ def cache_screenshot(self, pk: int, **kwargs: Any) -> WerkzeugResponse:

chart_url = get_url_path("Superset.slice", slice_id=chart.id)
screenshot_obj = ChartScreenshot(chart_url, chart.digest)
cache_key = screenshot_obj.cache_key(window_size, thumb_size)
cache_key = screenshot_obj.get_cache_key(window_size, thumb_size)
cache_payload = (
screenshot_obj.get_from_cache_key(cache_key) or ScreenshotCachePayload()
)
image_url = get_url_path(
"ChartRestApi.screenshot", pk=chart.id, digest=cache_key
)

def trigger_celery() -> WerkzeugResponse:
def build_response(status_code: int) -> WerkzeugResponse:
return self.response(
status_code,
cache_key=cache_key,
chart_url=chart_url,
image_url=image_url,
task_updated_at=cache_payload.get_timestamp(),
task_status=cache_payload.get_status(),
)

if cache_payload.should_trigger_task(force):
logger.info("Triggering screenshot ASYNC")
screenshot_obj.cache.set(cache_key, ScreenshotCachePayload())
cache_chart_thumbnail.delay(
current_user=get_current_user(),
chart_id=chart.id,
force=True,
window_size=window_size,
thumb_size=thumb_size,
force=force,
)
return self.response(
202, cache_key=cache_key, chart_url=chart_url, image_url=image_url
)

return trigger_celery()
return build_response(202)
return build_response(200)

@expose("/<pk>/screenshot/<digest>/", methods=("GET",))
@protect()
Expand All @@ -635,7 +658,7 @@ def screenshot(self, pk: int, digest: str) -> WerkzeugResponse:
name: digest
responses:
200:
description: Chart thumbnail image
description: Chart screenshot image
content:
image/*:
schema:
Expand All @@ -652,16 +675,16 @@ def screenshot(self, pk: int, digest: str) -> WerkzeugResponse:
"""
chart = self.datamodel.get(pk, self._base_filters)

# Making sure the chart still exists
if not chart:
return self.response_404()

# fetch the chart screenshot using the current user and cache if set
if img := ChartScreenshot.get_from_cache_key(thumbnail_cache, digest):
return Response(
FileWrapper(img), mimetype="image/png", direct_passthrough=True
)
# TODO: return an empty image
if cache_payload := ChartScreenshot.get_from_cache_key(digest):
if cache_payload.status == StatusValues.UPDATED:
return Response(
FileWrapper(cache_payload.get_image()),
mimetype="image/png",
direct_passthrough=True,
)
return self.response_404()

@expose("/<pk>/thumbnail/<digest>/", methods=("GET",))
Expand All @@ -685,9 +708,10 @@ def thumbnail(self, pk: int, digest: str, **kwargs: Any) -> WerkzeugResponse:
type: integer
name: pk
- in: path
name: digest
description: A hex digest that makes this chart unique
schema:
type: string
name: digest
responses:
200:
description: Chart thumbnail image
Expand All @@ -712,44 +736,41 @@ def thumbnail(self, pk: int, digest: str, **kwargs: Any) -> WerkzeugResponse:
return self.response_404()

current_user = get_current_user()
url = get_url_path("Superset.slice", slice_id=chart.id)
if kwargs["rison"].get("force", False):
logger.info(
"Triggering thumbnail compute (chart id: %s) ASYNC", str(chart.id)
)
cache_chart_thumbnail.delay(
current_user=current_user,
chart_id=chart.id,
force=True,
if chart.digest != digest:
self.incr_stats("redirect", self.thumbnail.__name__)
return redirect(
url_for(
f"{self.__class__.__name__}.thumbnail", pk=pk, digest=chart.digest
)
)
return self.response(202, message="OK Async")
# fetch the chart screenshot using the current user and cache if set
screenshot = ChartScreenshot(url, chart.digest).get_from_cache(
cache=thumbnail_cache
url = get_url_path("Superset.slice", slice_id=chart.id)
screenshot_obj = ChartScreenshot(url, chart.digest)
cache_key = screenshot_obj.get_cache_key()
cache_payload = (
screenshot_obj.get_from_cache_key(cache_key) or ScreenshotCachePayload()
)
# If not screenshot then send request to compute thumb to celery
if not screenshot:

if cache_payload.should_trigger_task():
self.incr_stats("async", self.thumbnail.__name__)
logger.info(
"Triggering thumbnail compute (chart id: %s) ASYNC", str(chart.id)
)
screenshot_obj.cache.set(cache_key, ScreenshotCachePayload())
cache_chart_thumbnail.delay(
current_user=current_user,
chart_id=chart.id,
force=True,
force=False,
)
return self.response(202, message="OK Async")
# If digests
if chart.digest != digest:
self.incr_stats("redirect", self.thumbnail.__name__)
return redirect(
url_for(
f"{self.__class__.__name__}.thumbnail", pk=pk, digest=chart.digest
)
return self.response(
202,
task_updated_at=cache_payload.get_timestamp(),
task_status=cache_payload.get_status(),
)
self.incr_stats("from_cache", self.thumbnail.__name__)
return Response(
FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True
FileWrapper(cache_payload.get_image()),
mimetype="image/png",
direct_passthrough=True,
)

@expose("/export/", methods=("GET",))
Expand Down
15 changes: 15 additions & 0 deletions superset/charts/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,21 @@ class ChartCacheScreenshotResponseSchema(Schema):
image_url = fields.String(
metadata={"description": "The url to fetch the screenshot"}
)
task_status = fields.String(
metadata={"description": "The status of the async screenshot"}
)
task_updated_at = fields.String(
metadata={"description": "The timestamp of the last change in status"}
)


class ChartGetCachedScreenshotResponseSchema(Schema):
task_status = fields.String(
metadata={"description": "The status of the async screenshot"}
)
task_updated_at = fields.String(
metadata={"description": "The timestamp of the last change in status"}
)


class ChartDataColumnSchema(Schema):
Expand Down
2 changes: 2 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,8 +729,10 @@ class D3TimeFormat(TypedDict, total=False):

THUMBNAIL_CACHE_CONFIG: CacheConfig = {
"CACHE_TYPE": "NullCache",
"CACHE_DEFAULT_TIMEOUT": int(timedelta(days=7).total_seconds()),
"CACHE_NO_NULL_WARNING": True,
}
THUMBNAIL_ERROR_CACHE_TTL = int(timedelta(days=1).total_seconds())

# Time before selenium times out after trying to locate an element on the page and wait
# for that element to load for a screenshot.
Expand Down
Loading

0 comments on commit 7db0589

Please sign in to comment.