diff --git a/label_studio/core/settings/base.py b/label_studio/core/settings/base.py index 0e32faad678..6d88089a77a 100644 --- a/label_studio/core/settings/base.py +++ b/label_studio/core/settings/base.py @@ -528,8 +528,6 @@ USER_LOGIN_FORM = 'users.forms.LoginForm' PROJECT_MIXIN = 'projects.mixins.ProjectMixin' TASK_MIXIN = 'tasks.mixins.TaskMixin' -LSE_PROJECT = None -GET_TASKS_AGREEMENT_QUERYSET = None ANNOTATION_MIXIN = 'tasks.mixins.AnnotationMixin' ORGANIZATION_MIXIN = 'organizations.mixins.OrganizationMixin' USER_MIXIN = 'users.mixins.UserMixin' diff --git a/label_studio/data_manager/managers.py b/label_studio/data_manager/managers.py index 2076eb35044..8673d45c50f 100644 --- a/label_studio/data_manager/managers.py +++ b/label_studio/data_manager/managers.py @@ -11,22 +11,7 @@ from django.contrib.postgres.aggregates import ArrayAgg from django.contrib.postgres.fields.jsonb import KeyTextTransform from django.db import models -from django.db.models import ( - Aggregate, - Avg, - Case, - DateTimeField, - Exists, - F, - FloatField, - IntegerField, - OuterRef, - Q, - Subquery, - TextField, - Value, - When, -) +from django.db.models import Aggregate, Avg, Case, Exists, F, FloatField, OuterRef, Q, Subquery, TextField, Value, When from django.db.models.functions import Cast, Coalesce, Concat from pydantic import BaseModel @@ -513,56 +498,8 @@ def __init__(self, expression, distinct=False, output_field=None, **extra): def annotate_completed_at(queryset): from tasks.models import Annotation - LseProject = load_func(settings.LSE_PROJECT) - get_tasks_agreement_queryset = load_func(settings.GET_TASKS_AGREEMENT_QUERYSET) - newest = Annotation.objects.filter(task=OuterRef('pk')).order_by('-id')[:1] - if ( - get_tasks_agreement_queryset - and LseProject - and queryset - and LseProject.objects.filter(project_id=queryset[0].project_id).exists() - ): - queryset = get_tasks_agreement_queryset(queryset) - - # Subquery to get the agreement_threshold for each project - agreement_threshold_subquery = Subquery( - LseProject.objects.filter(project_id=OuterRef('project_id')).values('agreement_threshold')[:1], - output_field=FloatField(), - ) - - # Subquery for max_additional_annotators_assignable + overlap - max_annotators_subquery = Subquery( - LseProject.objects.filter(project_id=OuterRef('project_id')) - .annotate(total_max_annotators=F('max_additional_annotators_assignable') + OuterRef('overlap')) - .values('total_max_annotators')[:1], - output_field=IntegerField(), - ) - - # Subquery to get the latest Annotation for each task - agreement_threshold_exists_subquery = Exists( - LseProject.objects.filter(project_id=OuterRef('project_id'), agreement_threshold__isnull=False) - ) - - completed_at_case = Case( - When( - # Check if agreement_threshold is not set - ~agreement_threshold_exists_subquery & Q(is_labeled=True), - then=Subquery(newest.values('created_at')), - ), - When( - # If agreement_threshold is set, evaluate all conditions - agreement_threshold_exists_subquery - & Q(is_labeled=True) - & (Q(_agreement__gte=agreement_threshold_subquery) | Q(annotation_count__gte=max_annotators_subquery)), - then=Subquery(newest.values('created_at')), - ), - default=Value(None), - output_field=DateTimeField(), - ) - return queryset.annotate(completed_at=completed_at_case) - else: - return queryset.annotate(completed_at=Case(When(is_labeled=True, then=Subquery(newest.values('created_at'))))) + return queryset.annotate(completed_at=Case(When(is_labeled=True, then=Subquery(newest.values('created_at'))))) def annotate_storage_filename(queryset: TaskQuerySet) -> TaskQuerySet: diff --git a/label_studio/projects/functions/__init__.py b/label_studio/projects/functions/__init__.py index 0b87d954d6a..e1d2a2274c8 100644 --- a/label_studio/projects/functions/__init__.py +++ b/label_studio/projects/functions/__init__.py @@ -22,13 +22,19 @@ def annotate_finished_task_number(queryset): def annotate_total_predictions_number(queryset): if flag_set( - 'fflag_perf_back_lsdv_4695_update_prediction_query_to_use_direct_project_relation', + 'fflag_fix_back_lsdv_4719_improve_performance_of_project_annotations', user='auto', ): - predictions = Prediction.objects.filter(project=OuterRef('id')).values('id') + if flag_set( + 'fflag_perf_back_lsdv_4695_update_prediction_query_to_use_direct_project_relation', + user='auto', + ): + predictions = Prediction.objects.filter(project=OuterRef('id')).values('id') + else: + predictions = Prediction.objects.filter(task__project=OuterRef('id')).values('id') + return queryset.annotate(total_predictions_number=SQCount(predictions)) else: - predictions = Prediction.objects.filter(task__project=OuterRef('id')).values('id') - return queryset.annotate(total_predictions_number=SQCount(predictions)) + return queryset.annotate(total_predictions_number=Count('tasks__predictions', distinct=True)) def annotate_total_annotations_number(queryset): diff --git a/label_studio/projects/functions/next_task.py b/label_studio/projects/functions/next_task.py index d57817a53f7..5f6462d7374 100644 --- a/label_studio/projects/functions/next_task.py +++ b/label_studio/projects/functions/next_task.py @@ -2,17 +2,15 @@ from collections import Counter from core.feature_flags import flag_set -from core.utils.common import conditional_atomic, db_is_not_sqlite, load_func +from core.utils.common import conditional_atomic, db_is_not_sqlite from django.conf import settings -from django.db.models import BooleanField, Case, Count, Exists, F, Max, OuterRef, Q, Value, When +from django.db.models import BooleanField, Case, Count, Exists, Max, OuterRef, Q, Value, When from django.db.models.fields import DecimalField from projects.functions.stream_history import add_stream_history from tasks.models import Annotation, Task logger = logging.getLogger(__name__) -get_tasks_agreement_queryset = load_func(settings.GET_TASKS_AGREEMENT_QUERYSET) - def get_next_task_logging_level(user): level = logging.DEBUG @@ -143,35 +141,13 @@ def get_not_solved_tasks_qs(user, project, prepared_tasks, assigned_flag, queue_ user_postponed_tasks = postponed_drafts.distinct().values_list('task__pk', flat=True) not_solved_tasks = not_solved_tasks.exclude(pk__in=user_postponed_tasks) - prioritized_on_agreement = False - # if annotator is assigned for tasks, he must solve it regardless of is_labeled=True + # if annotator is assigned for tasks, he must to solve it regardless of is_labeled=True if not assigned_flag: - # include tasks that have been completed if their agreement is not at threshold if threshold setting is set - lse_project = getattr(project, 'lse_project', None) - if ( - lse_project - and lse_project.agreement_threshold is not None - and get_tasks_agreement_queryset - and user.is_annotator - ): - not_solved_tasks = ( - get_tasks_agreement_queryset(not_solved_tasks) - # include tasks that are not labeled or are labeled but fall below the agreement threshold - .filter( - Q(_agreement__lt=lse_project.agreement_threshold, is_labeled=True) | Q(is_labeled=False) - ).annotate(annotators=Count('annotations__completed_by', distinct=True)) - # skip tasks that have been annotated by the maximum additional number of annotators - .filter(annotators__lt=F('overlap') + lse_project.max_additional_annotators_assignable) - ) - prioritized_on_agreement, not_solved_tasks = _prioritize_low_agreement_tasks(not_solved_tasks, lse_project) - - # otherwise, filtering out completed tasks is sufficient - else: - not_solved_tasks = not_solved_tasks.filter(is_labeled=False) + not_solved_tasks = not_solved_tasks.filter(is_labeled=False) if not flag_set('fflag_fix_back_lsdv_4523_show_overlap_first_order_27022023_short'): - # show tasks with overlap > 1 first (unless tasks are already prioritized on agreement) - if project.show_overlap_first and not prioritized_on_agreement: + # show tasks with overlap > 1 first + if project.show_overlap_first: # don't output anything - just filter tasks with overlap logger.debug(f'User={user} tries overlap first from prepared tasks') _, not_solved_tasks = _try_tasks_with_overlap(not_solved_tasks) @@ -180,11 +156,6 @@ def get_not_solved_tasks_qs(user, project, prepared_tasks, assigned_flag, queue_ return not_solved_tasks, user_solved_tasks_array, queue_info -def _prioritize_low_agreement_tasks(tasks, lse_project): - low_agreement_tasks = tasks.filter(_agreement__lt=lse_project.agreement_threshold, is_labeled=True) - return (True, low_agreement_tasks) if low_agreement_tasks else (False, tasks) - - def get_next_task_without_dm_queue(user, project, not_solved_tasks, assigned_flag): next_task = None use_task_lock = True @@ -211,7 +182,7 @@ def get_next_task_without_dm_queue(user, project, not_solved_tasks, assigned_fla queue_info += (' & ' if queue_info else '') + 'Ground truth queue' if not next_task and project.maximum_annotations > 1: - # if there are any tasks in progress (with maximum number of annotations), randomly sampling from them + # if there any tasks in progress (with maximum number of annotations), randomly sampling from them logger.debug(f'User={user} tries depth first from prepared tasks') next_task = _try_breadth_first(not_solved_tasks, user) if next_task: diff --git a/label_studio/tasks/models.py b/label_studio/tasks/models.py index ae951293227..fac91970177 100644 --- a/label_studio/tasks/models.py +++ b/label_studio/tasks/models.py @@ -220,8 +220,7 @@ def has_lock(self, user=None): num_locks = self.num_locks_user(user=user) num_annotations = self.annotations.exclude(q | Q(ground_truth=True)).count() num = num_locks + num_annotations - - if num > self.overlap_with_agreement_threshold(num, num_locks): + if num > self.overlap: logger.error( f'Num takes={num} > overlap={self.overlap} for task={self.id}, ' f"skipped mode {self.project.skip_queue} - it's a bug", @@ -236,8 +235,7 @@ def has_lock(self, user=None): self.update_is_labeled() if self.is_labeled is True: self.save(update_fields=['is_labeled']) - - result = bool(num >= self.overlap_with_agreement_threshold(num, num_locks)) + result = bool(num >= self.overlap) logger.log( get_next_task_logging_level(user), f'Task {self} locked: {result}; num_locks: {num_locks} num_annotations: {num_annotations} ' @@ -249,25 +247,6 @@ def has_lock(self, user=None): def num_locks(self): return self.locks.filter(expire_at__gt=now()).count() - def overlap_with_agreement_threshold(self, num, num_locks): - try: - from stats.models import get_task_agreement - except (ModuleNotFoundError, ImportError): - pass - - # Limit to one extra annotator at a time when the task is under the threshold and meets the overlap criteria, - # regardless of the max_additional_annotators_assignable setting. This ensures recalculating agreement after - # each annotation and prevents concurrent annotations from dropping the agreement below the threshold. - if hasattr(self.project, 'lse_project') and self.project.lse_project.agreement_threshold is not None: - agreement = get_task_agreement(self) - if agreement is not None and agreement < self.project.lse_project.agreement_threshold: - return ( - min(self.overlap + self.project.lse_project.max_additional_annotators_assignable, num + 1) - if num_locks == 0 - else num - ) - return self.overlap - def num_locks_user(self, user): return self.locks.filter(expire_at__gt=now()).exclude(user=user).count()