Skip to content

Commit

Permalink
Download: Fix crash when an HTML page links to the same embedded URL …
Browse files Browse the repository at this point in the history
…multiple times

...if a differing fragment component is used in the URL

Helps download & serve: https://minimalistbaker.com/
  • Loading branch information
davidfstr committed Dec 13, 2023
1 parent f6b0f19 commit 2546b3d
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 15 deletions.
2 changes: 1 addition & 1 deletion src/crystal/browser/tasktree.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class TaskTreeNode:
def __init__(self, task: Task) -> None:
self.task = task

if Task._USE_EXTRA_LISTENER_ASSERTIONS: # type: ignore[truthy-function] # @cached_property
if Task._USE_EXTRA_LISTENER_ASSERTIONS:
assert self not in self.task.listeners
if not task.complete:
self.task.listeners.append(self)
Expand Down
1 change: 0 additions & 1 deletion src/crystal/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
)
import cgi
import datetime
from functools import cached_property
import json
import math
import mimetypes
Expand Down
25 changes: 13 additions & 12 deletions src/crystal/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
warn_if_slow,
)
from crystal.util.progress import ProgressBarCalculator
from crystal.util.xcollections.dedup import dedup_list
from crystal.util.xcollections.lazy import AppendableLazySequence
from crystal.util.xfutures import Future
from crystal.util.xgc import gc_disabled
Expand All @@ -17,7 +18,6 @@
bg_affinity, bg_call_later, fg_affinity, fg_call_and_wait, fg_call_later,
is_foreground_thread, NoForegroundThreadError
)
from functools import cached_property
import os
from overrides import overrides
import shutil
Expand Down Expand Up @@ -99,16 +99,14 @@ class Task(ListenableMixin):
Tasks are not allowed to be complete immediately after initialization
unless explicitly documented in the Task class's docstring.
"""
@cached_property
def _USE_EXTRA_LISTENER_ASSERTIONS(self) -> bool:
return os.environ.get('CRYSTAL_RUNNING_TESTS', 'False') == 'True'

# Abstract fields for subclasses to override
icon_name = None # type: Optional[str] # abstract
"""The name of the icon resource used for this task, or None to use the default icon."""
scheduling_style = SCHEDULING_STYLE_NONE # abstract for container task types
"""For a container task, defines the order that task units from children will be executed in."""

_USE_EXTRA_LISTENER_ASSERTIONS = \
os.environ.get('CRYSTAL_RUNNING_TESTS', 'False') == 'True'
_REPORTED_TASKS_WITH_MANY_LISTENERS = WeakSet() # type: WeakSet[Task]

# Optimize per-instance memory use, since there may be very many Task objects
Expand Down Expand Up @@ -268,13 +266,16 @@ def materialize_child(self, child: Task, *, already_complete_ok: bool=False) ->
f'and already_completed_ok is False. '
f'self={self}, child={child}')

if Task._USE_EXTRA_LISTENER_ASSERTIONS: # type: ignore[truthy-function] # @cached_property
if Task._USE_EXTRA_LISTENER_ASSERTIONS:
assert child in self._children
# NOTE: child._parent may already be set to a different parent
child._parent = self

if Task._USE_EXTRA_LISTENER_ASSERTIONS: # type: ignore[truthy-function] # @cached_property
assert self not in child.listeners
if Task._USE_EXTRA_LISTENER_ASSERTIONS:
assert self not in child.listeners, (
f'Expected {self=} to not already be listening to {child=}. '
f'Was child added multiple times to the same parent?'
)
if len(child.listeners) >= 50:
if child not in Task._REPORTED_TASKS_WITH_MANY_LISTENERS:
Task._REPORTED_TASKS_WITH_MANY_LISTENERS.add(child)
Expand Down Expand Up @@ -362,7 +363,7 @@ def clear_children_if_all_complete(self) -> bool:
if all_children_complete:
for child in self._children:
child._parent = None
if Task._USE_EXTRA_LISTENER_ASSERTIONS: # type: ignore[truthy-function] # @cached_property
if Task._USE_EXTRA_LISTENER_ASSERTIONS:
assert self not in child.listeners
self._children = []
self._num_children_complete = 0
Expand Down Expand Up @@ -897,7 +898,7 @@ def fg_task() -> List[Resource]:
# an alias of itself
pass
else:
for resource in embedded_resources:
for resource in dedup_list(embedded_resources):
if resource in ancestor_downloading_resources:
# Avoid infinite recursion when resource identifies itself
# (probably incorrectly) as an embedded resource of itself,
Expand Down Expand Up @@ -1221,7 +1222,7 @@ def __init__(self, group: ResourceGroup) -> None:
title='Downloading members of group: %s' % group.name)
self.group = group

if Task._USE_EXTRA_LISTENER_ASSERTIONS: # type: ignore[truthy-function] # @cached_property
if Task._USE_EXTRA_LISTENER_ASSERTIONS:
assert self not in self.group.listeners
self.group.listeners.append(self)

Expand Down Expand Up @@ -1335,7 +1336,7 @@ def _update_completed_status(self):

def finish(self) -> None:
self.group.listeners.remove(self)
if Task._USE_EXTRA_LISTENER_ASSERTIONS: # type: ignore[truthy-function] # @cached_property
if Task._USE_EXTRA_LISTENER_ASSERTIONS:
assert self not in self.group.listeners

if self._pbc is not None:
Expand Down
34 changes: 34 additions & 0 deletions src/crystal/tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ async def test_does_not_download_embedded_resources_of_recognized_binary_resourc

assert ['/'] == server.requested_paths


async def test_does_not_download_forever_given_embedded_resources_form_a_cycle() -> None:
server = MockHttpServer({
'/': dict(
Expand Down Expand Up @@ -325,6 +326,39 @@ async def test_when_download_resource_given_all_embedded_resources_already_downl
assert ['/index.php'] == server.requested_paths


async def test_given_same_resource_embedded_multiple_times_then_downloads_it_only_once() -> None:
server = MockHttpServer({
'/': dict(
status_code=200,
headers=[('Content-Type', 'text/html')],
content=dedent(
"""
<!DOCTYPE html>
<html>
<body>
<img src="/assets/image.png" />
<img src="/assets/image.png" />
<img src="/assets/image.png#fragment-should-be-ignored" />
</body>
</html>
"""
).lstrip('\n').encode('utf-8')
)
})
with server:
with tempfile.TemporaryDirectory(suffix='.crystalproj') as project_dirpath:
async with (await OpenOrCreateDialog.wait_for()).create(project_dirpath) as (mw, _):
project = Project._last_opened_project
assert project is not None

r = Resource(project, server.get_url('/'))
revision_future = r.download(wait_for_embedded=True)
while not revision_future.done():
await bg_sleep(DEFAULT_WAIT_PERIOD)

assert ['/', '/assets/image.png'] == server.requested_paths


# ------------------------------------------------------------------------------
# DownloadResourceGroupTask Tests

Expand Down
6 changes: 5 additions & 1 deletion src/crystal/tests/test_tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from crystal.task import (
ASSUME_RESOURCES_DOWNLOADED_IN_SESSION_WILL_ALWAYS_REMAIN_FRESH,
ProjectFreeSpaceTooLowError
ProjectFreeSpaceTooLowError, Task
)
from crystal.tests.util.asserts import *
from crystal.tests.util.data import (
Expand Down Expand Up @@ -135,6 +135,10 @@ async def test_some_tasks_may_complete_immediately(subtests) -> None:
)


async def test_given_running_tests_then_uses_extra_listener_assertions() -> None:
assert True == Task._USE_EXTRA_LISTENER_ASSERTIONS


# ------------------------------------------------------------------------------
# Test: DownloadResourceTask
# (+ DownloadResourceBodyTask)
Expand Down
12 changes: 12 additions & 0 deletions src/crystal/util/xcollections/dedup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from typing import List, TypeVar


_E = TypeVar('_E')


def dedup_list(xs: List[_E]) -> List[_E]:
"""
Removes duplicates from the specified list,
preserving the original order of elements.
"""
return list(dict.fromkeys(xs))

0 comments on commit 2546b3d

Please sign in to comment.