Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
cf13097
Added show_ground_truth_always field in project and handle next_task
mcanu Nov 27, 2025
7148a06
Modified migration
mcanu Nov 27, 2025
11b1697
Fix migration default
mcanu Nov 27, 2025
cf65c8f
Fixed migration
mcanu Nov 28, 2025
736ddde
Sync Follow Merge dependencies
nass600 Dec 4, 2025
d17a17a
Merge branch 'develop' into 'fb-utc-416'
nass600 Dec 4, 2025
655fe5d
added warning badge
nass600 Dec 4, 2025
a3fa9b0
added icons
nass600 Dec 4, 2025
d8dc179
Added new fields
mcanu Dec 4, 2025
5d38ddf
Merge branch 'develop' into 'fb-utc-416'
matt-bernstein Dec 5, 2025
a995df7
Sync Follow Merge dependencies
robot-ci-heartex Dec 5, 2025
fc8aabc
Sync Follow Merge dependencies
robot-ci-heartex Dec 5, 2025
cdc89b6
Using new fields
mcanu Dec 5, 2025
2ae8516
Sync Follow Merge dependencies
robot-ci-heartex Dec 5, 2025
9788e19
Sync Follow Merge dependencies
robot-ci-heartex Dec 5, 2025
764e408
Sync Follow Merge dependencies
mcanu Dec 8, 2025
f8a19ab
Merge branch 'develop' into 'fb-utc-416'
mcanu Dec 8, 2025
2c0d2cf
Deprecated show_ground_truth_first
mcanu Dec 8, 2025
ebce42e
Sync Follow Merge dependencies
robot-ci-heartex Dec 8, 2025
d4b1713
Merge branch 'develop' into 'fb-utc-416'
robot-ci-heartex Dec 8, 2025
da019ac
Sync Follow Merge dependencies
robot-ci-heartex Dec 8, 2025
7af4a7e
Fix test
mcanu Dec 8, 2025
3e5ee9a
Sync Follow Merge dependencies
mcanu Dec 8, 2025
12275ef
Sync Follow Merge dependencies
robot-ci-heartex Dec 8, 2025
256b4b0
Sync Follow Merge dependencies
mcanu Dec 9, 2025
6e80b9c
Merge branch 'develop' into 'fb-utc-416'
mcanu Dec 9, 2025
719e00e
annotator_evaluation_enabled default to False
mcanu Dec 9, 2025
efda0ee
Fix columns test
mcanu Dec 9, 2025
5ce366e
Sync Follow Merge dependencies
mcanu Dec 10, 2025
2990a35
Merge branch 'develop' into 'fb-utc-416'
mcanu Dec 10, 2025
31f3007
Sync Follow Merge dependencies
robot-ci-heartex Dec 10, 2025
798c8d0
Sync Follow Merge dependencies
robot-ci-heartex Dec 10, 2025
367c9b1
Merge branch 'develop' into 'fb-utc-416'
mcanu Dec 11, 2025
254b3af
Sync Follow Merge dependencies
robot-ci-heartex Dec 11, 2025
a1a7741
Merge branch 'develop' into 'fb-utc-416'
robot-ci-heartex Dec 11, 2025
86598f5
Sync Follow Merge dependencies
robot-ci-heartex Dec 11, 2025
b94d723
Sync Follow Merge dependencies
mcanu Dec 12, 2025
b4c1f2d
Merge branch 'develop' into 'fb-utc-416'
mcanu Dec 12, 2025
862d4b4
Sync Follow Merge dependencies
mcanu Dec 15, 2025
c7d1448
Merge branch 'develop' into 'fb-utc-416'
mcanu Dec 15, 2025
ab8cc53
Merge branch 'develop' into 'fb-utc-416'
mcanu Dec 15, 2025
9cee6e3
Sync Follow Merge dependencies
robot-ci-heartex Dec 15, 2025
9b919d8
Merge branch 'develop' into 'fb-utc-416'
mcanu Dec 15, 2025
ad9e951
Sync Follow Merge dependencies
robot-ci-heartex Dec 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions docs/source/guide/webhook_reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ The webhook payload includes the name of the action and some additional task dat
"created_at": "2021-08-17T13:49:34.326416Z",
"updated_at": "2021-08-17T13:49:35.911271Z",
"sampling": "Sequential sampling",
"show_ground_truth_first": true,
"show_overlap_first": true,
"overlap_cohort_percentage": 100,
"task_data_login": null,
Expand Down Expand Up @@ -219,7 +218,6 @@ Sent when a task is deleted from Label Studio. See how to [set up a webhook for
"created_at": "2021-08-17T13:49:34.326416Z",
"updated_at": "2021-08-17T13:52:09.334425Z",
"sampling": "Sequential sampling",
"show_ground_truth_first": true,
"show_overlap_first": true,
"overlap_cohort_percentage": 100,
"task_data_login": null,
Expand Down Expand Up @@ -328,7 +326,6 @@ The webhook payload includes the name of the action and some additional annotati
"created_at": "2021-08-17T13:49:34.326416Z",
"updated_at": "2021-08-17T13:52:09.334425Z",
"sampling": "Sequential sampling",
"show_ground_truth_first": true,
"show_overlap_first": true,
"overlap_cohort_percentage": 100,
"task_data_login": null,
Expand Down Expand Up @@ -463,7 +460,6 @@ The webhook payload includes the name of the action and some additional annotati
"created_at": "2021-08-12T14:15:01.744507Z",
"updated_at": "2021-08-17T13:35:25.697471Z",
"sampling": "Sequential sampling",
"show_ground_truth_first": true,
"show_overlap_first": true,
"overlap_cohort_percentage": 100,
"task_data_login": null,
Expand Down Expand Up @@ -538,7 +534,6 @@ Sent when an annotation is deleted. See how to [set up a webhook for this event]
"created_at": "2021-08-17T13:49:34.326416Z",
"updated_at": "2021-08-17T13:52:09.334425Z",
"sampling": "Sequential sampling",
"show_ground_truth_first": true,
"show_overlap_first": true,
"overlap_cohort_percentage": 100,
"task_data_login": null,
Expand Down Expand Up @@ -603,7 +598,6 @@ The webhook payload includes the name of the action and some additional project
"created_at": "2021-08-17T13:55:58.809065Z",
"updated_at": "2021-08-17T13:55:58.809098Z",
"sampling": "Sequential sampling",
"show_ground_truth_first": true,
"show_overlap_first": true,
"overlap_cohort_percentage": 100,
"task_data_login": null,
Expand Down Expand Up @@ -674,7 +668,6 @@ The webhook payload includes the name of the action and some additional project
"created_at": "2021-08-12T14:15:01.744507Z",
"updated_at": "2021-08-17T13:39:14.054849Z",
"sampling": "Sequential sampling",
"show_ground_truth_first": true,
"show_overlap_first": true,
"overlap_cohort_percentage": 100,
"task_data_login": null,
Expand Down
2 changes: 1 addition & 1 deletion label_studio/projects/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def get_queryset(self):
'total_annotations_number': 10,
'total_predictions_number': 0,
'sampling': 'Sequential sampling',
'show_ground_truth_first': True,
'annotator_evaluation_enabled': False,
'show_overlap_first': True,
'overlap_cohort_percentage': 100,
'task_data_login': 'user',
Expand Down
6 changes: 2 additions & 4 deletions label_studio/projects/functions/next_task.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ flowchart TD
B3 -- no --> B4{"LSE low-agreement path?<br/>fflag OPTIC-161<br/>agreement_threshold set<br/>user is annotator"}
B4 -- yes --> B6["Filter by agreement threshold<br/>and annotator capacity"] --> B7[Optionally prioritize by low agreement]

B4 -- no --> B8{"Evaluation mode?<br/>fflag ALL-LEAP-1825<br/>show_ground_truth_first"}
B4 -- no --> B8{"Evaluation mode?<br/>fflag ALL-LEAP-1825<br/>annotator_evaluation_enabled"}
B8 -- yes --> B7
B8 -- no --> B9[Filter: is_labeled=false] --> B7
end
Expand Down Expand Up @@ -69,9 +69,7 @@ flowchart TD

### GT-first gating
- `should_attempt_ground_truth_first(user, project)` returns true when:
- `show_ground_truth_first=True` and either no `lse_project` or `annotator_evaluation_minimum_tasks` is not set, or
- the user's completed GT-equipped tasks < `annotator_evaluation_minimum_tasks`, or
- minimum tasks reached but the user's GT agreement score is missing or below `annotator_evaluation_minimum_score` (percent).
- `annotator_evaluation_enabled=True` and `annotator_evaluation_onboarding_tasks > 0` and the user's completed GT-equipped tasks < `annotator_evaluation_onboarding_tasks`.
- Otherwise returns false (GT-first disabled; proceed via low-agreement/overlap/sampling).

## Queue labels appended to response
Expand Down
70 changes: 38 additions & 32 deletions label_studio/projects/functions/next_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@


# Hook for GT-first gating (Enterprise can override via settings)
def _oss_should_attempt_gt_first(user: User, project: Project) -> bool:
# Open-source default: if project enables GT-first, allow it without onboarding gates
return bool(project.show_ground_truth_first)
def _lso_should_attempt_gt_first(user: User, project: Project) -> bool:
# Open-source default: if project enables annotator evaluation, allow it without onboarding gates
return bool(project.annotator_evaluation_enabled)


get_tasks_agreement_queryset = load_func(settings.GET_TASKS_AGREEMENT_QUERYSET)
should_attempt_ground_truth_first = (
load_func(settings.SHOULD_ATTEMPT_GROUND_TRUTH_FIRST) or _oss_should_attempt_gt_first
load_func(settings.SHOULD_ATTEMPT_GROUND_TRUTH_FIRST) or _lso_should_attempt_gt_first
)


Expand Down Expand Up @@ -59,10 +59,7 @@ def _get_first_unlocked(tasks_query: QuerySet[Task], user) -> Union[Task, None]:

def _try_ground_truth(tasks: QuerySet[Task], project: Project, user: User) -> Union[Task, None]:
"""Returns task from ground truth set"""
ground_truth = Annotation.objects.filter(task=OuterRef('pk'), ground_truth=True)
not_solved_tasks_with_ground_truths = tasks.annotate(has_ground_truths=Exists(ground_truth)).filter(
has_ground_truths=True
)
not_solved_tasks_with_ground_truths = _annotate_has_ground_truths(tasks).filter(has_ground_truths=True)
if not_solved_tasks_with_ground_truths.exists():
if project.sampling == project.SEQUENCE:
return _get_first_unlocked(not_solved_tasks_with_ground_truths, user)
Expand All @@ -78,13 +75,15 @@ def _try_tasks_with_overlap(tasks: QuerySet[Task]) -> Tuple[Union[Task, None], Q
return None, tasks.filter(overlap=1)


def _try_breadth_first(tasks: QuerySet[Task], user: User, project: Project) -> Union[Task, None]:
def _try_breadth_first(
tasks: QuerySet[Task], user: User, project: Project, attempt_gt_first: bool = False
) -> Union[Task, None]:
"""Try to find tasks with maximum amount of annotations, since we are trying to label tasks as fast as possible"""

# Exclude ground truth annotations from the count when not in onboarding mode
# Exclude ground truth annotations from the count when not in onboarding window
# to prevent GT tasks from being prioritized via breadth-first logic
annotation_filter = ~Q(annotations__completed_by=user)
if not project.show_ground_truth_first:
if not attempt_gt_first:
annotation_filter &= ~Q(annotations__ground_truth=True)

tasks = tasks.annotate(annotations_count=Count('annotations', filter=annotation_filter))
Expand Down Expand Up @@ -158,13 +157,18 @@ def _try_uncertainty_sampling(
return next_task


def _annotate_has_ground_truths(tasks: QuerySet[Task]) -> QuerySet[Task]:
ground_truth = Annotation.objects.filter(task=OuterRef('pk'), ground_truth=True)
return tasks.annotate(has_ground_truths=Exists(ground_truth))


def get_not_solved_tasks_qs(
user: User,
project: Project,
prepared_tasks: QuerySet[Task],
assigned_flag: Union[bool, None],
queue_info: str,
allow_gt_first: bool,
attempt_gt_first: bool,
) -> Tuple[QuerySet[Task], List[int], str, bool]:
user_solved_tasks_array = user.annotations.filter(project=project, task__isnull=False)
user_solved_tasks_array = user_solved_tasks_array.distinct().values_list('task__pk', flat=True)
Expand All @@ -188,7 +192,6 @@ def get_not_solved_tasks_qs(
and get_tasks_agreement_queryset
and user.is_project_annotator(project)
):
# Onboarding mode (GT-first) should keep GT tasks eligible regardless of is_labeled/agreement
qs = get_tasks_agreement_queryset(not_solved_tasks)
qs = qs.annotate(annotators=Count('annotations__completed_by', distinct=True))

Expand All @@ -197,13 +200,10 @@ def get_not_solved_tasks_qs(
)
capacity_pred = Q(annotators__lt=F('overlap') + (lse_project.max_additional_annotators_assignable or 0))

if project.show_ground_truth_first:
gt_subq = Annotation.objects.filter(task=OuterRef('pk'), ground_truth=True)
qs = qs.annotate(has_ground_truths=Exists(gt_subq))
# Keep all GT tasks + apply low-agreement+capacity to the rest. For sure, we can do:
# - if user.solved_tasks_array.count < lse_project.annotator_evaluation_minimum_tasks
# - else, apply low-agreement+capacity to the rest (maybe performance will be better)
# but it's a question - what is better here. This version is simpler at least from the code perspective.
if project.annotator_evaluation_enabled:
# Include ground truth tasks in the query if annotator evaluation is enabled
qs = _annotate_has_ground_truths(qs)
# Keep all GT tasks + apply low-agreement+capacity to the rest.
not_solved_tasks = qs.filter(Q(has_ground_truths=True) | (low_agreement_pred & capacity_pred))
else:
not_solved_tasks = qs.filter(low_agreement_pred & capacity_pred)
Expand All @@ -212,9 +212,15 @@ def get_not_solved_tasks_qs(

# otherwise, filtering out completed tasks is sufficient
else:
# ignore tasks that are already labeled when GT-first is NOT allowed
if not allow_gt_first:
not_solved_tasks = not_solved_tasks.filter(is_labeled=False)
if not attempt_gt_first:
# Outside of onboarding window
if project.annotator_evaluation_enabled:
# Include ground truth tasks in the query if outside of onboarding window and annotator evaluation is enabled
not_solved_tasks = _annotate_has_ground_truths(not_solved_tasks)
not_solved_tasks = not_solved_tasks.filter(Q(is_labeled=False) | Q(has_ground_truths=True))
else:
# Ignore tasks that are already labeled when outside of onboarding window and annotator evaluation is not enabled
not_solved_tasks = not_solved_tasks.filter(is_labeled=False)

if not flag_set('fflag_fix_back_lsdv_4523_show_overlap_first_order_27022023_short'):
# show tasks with overlap > 1 first (unless tasks are already prioritized on agreement)
Expand Down Expand Up @@ -244,7 +250,7 @@ def get_next_task_without_dm_queue(
not_solved_tasks: QuerySet,
assigned_flag: Union[bool, None],
prioritized_low_agreement: bool,
allow_gt_first: bool,
attempt_gt_first: bool,
) -> Tuple[Union[Task, None], bool, str]:
next_task = None
use_task_lock = True
Expand All @@ -265,8 +271,8 @@ def get_next_task_without_dm_queue(
use_task_lock = False
queue_info += (' & ' if queue_info else '') + 'Task lock'

# Ground truth: use precomputed gating for GT-first
if not next_task and allow_gt_first:
# Ground truth: attempt to label ground truth tasks in onboarding window
if not next_task and attempt_gt_first:
logger.debug(f'User={user} tries ground truth from prepared tasks')
next_task = _try_ground_truth(not_solved_tasks, project, user)
if next_task:
Expand All @@ -283,7 +289,7 @@ def get_next_task_without_dm_queue(
if not next_task and project.maximum_annotations > 1:
# if there are already labeled tasks, but task.overlap still < project.maximum_annotations, randomly sampling from them
logger.debug(f'User={user} tries depth first from prepared tasks')
next_task = _try_breadth_first(not_solved_tasks, user, project)
next_task = _try_breadth_first(not_solved_tasks, user, project, attempt_gt_first)
if next_task:
queue_info += (' & ' if queue_info else '') + 'Breadth first queue'

Expand Down Expand Up @@ -378,16 +384,16 @@ def get_next_task(
use_task_lock = True
queue_info = ''

# Ground truth: label GT first only during onboarding window for user (gated by min tasks and min score)
allow_gt_first = should_attempt_ground_truth_first(user, project)
# Ground truth: label GT first only during onboarding window for user (gated by onboarding task number)
attempt_gt_first = should_attempt_ground_truth_first(user, project)

not_solved_tasks, user_solved_tasks_array, queue_info, prioritized_low_agreement = get_not_solved_tasks_qs(
user, project, prepared_tasks, assigned_flag, queue_info, allow_gt_first
user, project, prepared_tasks, assigned_flag, queue_info, attempt_gt_first
)

if not dm_queue:
next_task, use_task_lock, queue_info = get_next_task_without_dm_queue(
user, project, not_solved_tasks, assigned_flag, prioritized_low_agreement, allow_gt_first
user, project, not_solved_tasks, assigned_flag, prioritized_low_agreement, attempt_gt_first
)

if flag_set('fflag_fix_back_lsdv_4523_show_overlap_first_order_27022023_short'):
Expand Down Expand Up @@ -452,7 +458,7 @@ def get_next_task(
'maximum_annotations': project.maximum_annotations,
'skip_queue': project.skip_queue,
'sampling': project.sampling,
'show_ground_truth_first': project.show_ground_truth_first,
'annotator_evaluation_enabled': project.annotator_evaluation_enabled,
'show_overlap_first': project.show_overlap_first,
'overlap_cohort_percentage': project.overlap_cohort_percentage,
'project_id': project.id,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 5.1.14 on 2025-12-09 21:37

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("projects", "0033_projects_soft_delete_indexes_async"),
]

operations = [
migrations.AddField(
model_name="project",
name="annotator_evaluation_enabled",
field=models.BooleanField(
db_default=False,
default=False,
help_text="Enable annotator evaluation for the project",
verbose_name="annotator evaluation enabled",
),
),
]
10 changes: 10 additions & 0 deletions label_studio/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,11 +315,21 @@ class SkipQueue(models.TextChoices):
skip_queue = models.CharField(
max_length=100, choices=SkipQueue.choices, null=True, default=SkipQueue.REQUEUE_FOR_OTHERS
)

# Deprecated in favor of annotator_evaluation_enabled
show_ground_truth_first = models.BooleanField(
_('show ground truth first'),
default=False,
help_text='Onboarding mode (true): show ground truth tasks first in the labeling stream',
)

annotator_evaluation_enabled = models.BooleanField(
_('annotator evaluation enabled'),
default=False,
db_default=False,
help_text='Enable annotator evaluation for the project',
)

show_overlap_first = models.BooleanField(_('show overlap first'), default=False)
overlap_cohort_percentage = models.IntegerField(_('overlap_cohort_percentage'), default=100)

Expand Down
3 changes: 3 additions & 0 deletions label_studio/projects/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import bleach
from constants import SAFE_HTML_ATTRIBUTES, SAFE_HTML_TAGS
from django.db.models import Q
from drf_spectacular.utils import extend_schema_serializer
from fsm.serializer_fields import FSMStateField
from label_studio_sdk.label_interface import LabelInterface
from label_studio_sdk.label_interface.control_tags import (
Expand Down Expand Up @@ -43,6 +44,7 @@ def __call__(self, serializer_field):
return serializer_field.context.get('created_by')


@extend_schema_serializer(deprecate_fields=['show_ground_truth_first'])
class ProjectSerializer(FlexFieldsModelSerializer):
"""Serializer get numbers from project queryset annotation,
make sure, that you use correct one(Project.objects.with_counts())
Expand Down Expand Up @@ -236,6 +238,7 @@ class Meta:
'total_predictions_number',
'sampling',
'show_ground_truth_first',
'annotator_evaluation_enabled',
'show_overlap_first',
'overlap_cohort_percentage',
'task_data_login',
Expand Down
6 changes: 2 additions & 4 deletions label_studio/tasks/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,10 +289,8 @@ def has_lock(self, user=None):
"""
from projects.functions.next_task import get_next_task_logging_level

if self.project.show_ground_truth_first:
# in show_ground_truth_first mode(onboarding)
# we ignore overlap setting for ground_truth tasks
# https://humansignal.atlassian.net/browse/LEAP-1963
if self.project.annotator_evaluation_enabled:
# In annotator evaluation mode, ignore overlap setting for ground truth tasks
if self.annotations.filter(ground_truth=True).exists():
return False

Expand Down
2 changes: 1 addition & 1 deletion label_studio/tests/data_manager/columns.tavern.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ stages:
"start_training_on_annotation_update": false, "show_collab_predictions": true, "num_tasks_with_annotations": null,
"task_number": null, "useful_annotation_number": null, "ground_truth_number": null, "skipped_annotations_number": null,
"total_annotations_number": null, "total_predictions_number": null, "sampling": "Sequential sampling",
"show_ground_truth_first": false, "show_overlap_first": false, "overlap_cohort_percentage": 100,
"show_ground_truth_first": false, "annotator_evaluation_enabled": false, "show_overlap_first": false, "overlap_cohort_percentage": 100,
"task_data_login": null, "task_data_password": null,
"control_weights": {"label": {"overall": 1.0, "type": "Choices", "labels": {"pos": 1.0, "neg": 1.0}}},
"parsed_label_config": {
Expand Down
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ dependencies = [
"tldextract (>=5.1.3)",
"uuid-utils (>=0.11.0,<1.0.0)",
## HumanSignal repo dependencies :start
"label-studio-sdk @ https://github.com/HumanSignal/label-studio-sdk/archive/58970c14d2c1683e5093eae848a8265cbbc4acac.zip",
"label-studio-sdk @ https://github.com/HumanSignal/label-studio-sdk/archive/8ce1b4f80f12780da5d07184e4bea25a5c05fe96.zip",
## HumanSignal repo dependencies :end
]

Expand Down
Loading
Loading