Skip to content

Commit

Permalink
chore: Revert "feat: OPTIC-139: [M1] Workflow Automation: Task Assign…
Browse files Browse the repository at this point in the history
…ment" (#5300)

Revert "feat: OPTIC-139: [M1] Workflow Automation: Task Assignment"
  • Loading branch information
bmartel committed Jan 17, 2024
1 parent 01ab026 commit c5e6bd5
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 130 deletions.
2 changes: 0 additions & 2 deletions label_studio/core/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,8 +528,6 @@
USER_LOGIN_FORM = 'users.forms.LoginForm'
PROJECT_MIXIN = 'projects.mixins.ProjectMixin'
TASK_MIXIN = 'tasks.mixins.TaskMixin'
LSE_PROJECT = None
GET_TASKS_AGREEMENT_QUERYSET = None
ANNOTATION_MIXIN = 'tasks.mixins.AnnotationMixin'
ORGANIZATION_MIXIN = 'organizations.mixins.OrganizationMixin'
USER_MIXIN = 'users.mixins.UserMixin'
Expand Down
67 changes: 2 additions & 65 deletions label_studio/data_manager/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,7 @@
from django.contrib.postgres.aggregates import ArrayAgg
from django.contrib.postgres.fields.jsonb import KeyTextTransform
from django.db import models
from django.db.models import (
Aggregate,
Avg,
Case,
DateTimeField,
Exists,
F,
FloatField,
IntegerField,
OuterRef,
Q,
Subquery,
TextField,
Value,
When,
)
from django.db.models import Aggregate, Avg, Case, Exists, F, FloatField, OuterRef, Q, Subquery, TextField, Value, When
from django.db.models.functions import Cast, Coalesce, Concat
from pydantic import BaseModel

Expand Down Expand Up @@ -513,56 +498,8 @@ def __init__(self, expression, distinct=False, output_field=None, **extra):
def annotate_completed_at(queryset):
from tasks.models import Annotation

LseProject = load_func(settings.LSE_PROJECT)
get_tasks_agreement_queryset = load_func(settings.GET_TASKS_AGREEMENT_QUERYSET)

newest = Annotation.objects.filter(task=OuterRef('pk')).order_by('-id')[:1]
if (
get_tasks_agreement_queryset
and LseProject
and queryset
and LseProject.objects.filter(project_id=queryset[0].project_id).exists()
):
queryset = get_tasks_agreement_queryset(queryset)

# Subquery to get the agreement_threshold for each project
agreement_threshold_subquery = Subquery(
LseProject.objects.filter(project_id=OuterRef('project_id')).values('agreement_threshold')[:1],
output_field=FloatField(),
)

# Subquery for max_additional_annotators_assignable + overlap
max_annotators_subquery = Subquery(
LseProject.objects.filter(project_id=OuterRef('project_id'))
.annotate(total_max_annotators=F('max_additional_annotators_assignable') + OuterRef('overlap'))
.values('total_max_annotators')[:1],
output_field=IntegerField(),
)

# Subquery to get the latest Annotation for each task
agreement_threshold_exists_subquery = Exists(
LseProject.objects.filter(project_id=OuterRef('project_id'), agreement_threshold__isnull=False)
)

completed_at_case = Case(
When(
# Check if agreement_threshold is not set
~agreement_threshold_exists_subquery & Q(is_labeled=True),
then=Subquery(newest.values('created_at')),
),
When(
# If agreement_threshold is set, evaluate all conditions
agreement_threshold_exists_subquery
& Q(is_labeled=True)
& (Q(_agreement__gte=agreement_threshold_subquery) | Q(annotation_count__gte=max_annotators_subquery)),
then=Subquery(newest.values('created_at')),
),
default=Value(None),
output_field=DateTimeField(),
)
return queryset.annotate(completed_at=completed_at_case)
else:
return queryset.annotate(completed_at=Case(When(is_labeled=True, then=Subquery(newest.values('created_at')))))
return queryset.annotate(completed_at=Case(When(is_labeled=True, then=Subquery(newest.values('created_at')))))


def annotate_storage_filename(queryset: TaskQuerySet) -> TaskQuerySet:
Expand Down
14 changes: 10 additions & 4 deletions label_studio/projects/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,19 @@ def annotate_finished_task_number(queryset):

def annotate_total_predictions_number(queryset):
if flag_set(
'fflag_perf_back_lsdv_4695_update_prediction_query_to_use_direct_project_relation',
'fflag_fix_back_lsdv_4719_improve_performance_of_project_annotations',
user='auto',
):
predictions = Prediction.objects.filter(project=OuterRef('id')).values('id')
if flag_set(
'fflag_perf_back_lsdv_4695_update_prediction_query_to_use_direct_project_relation',
user='auto',
):
predictions = Prediction.objects.filter(project=OuterRef('id')).values('id')
else:
predictions = Prediction.objects.filter(task__project=OuterRef('id')).values('id')
return queryset.annotate(total_predictions_number=SQCount(predictions))
else:
predictions = Prediction.objects.filter(task__project=OuterRef('id')).values('id')
return queryset.annotate(total_predictions_number=SQCount(predictions))
return queryset.annotate(total_predictions_number=Count('tasks__predictions', distinct=True))


def annotate_total_annotations_number(queryset):
Expand Down
43 changes: 7 additions & 36 deletions label_studio/projects/functions/next_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,15 @@
from collections import Counter

from core.feature_flags import flag_set
from core.utils.common import conditional_atomic, db_is_not_sqlite, load_func
from core.utils.common import conditional_atomic, db_is_not_sqlite
from django.conf import settings
from django.db.models import BooleanField, Case, Count, Exists, F, Max, OuterRef, Q, Value, When
from django.db.models import BooleanField, Case, Count, Exists, Max, OuterRef, Q, Value, When
from django.db.models.fields import DecimalField
from projects.functions.stream_history import add_stream_history
from tasks.models import Annotation, Task

logger = logging.getLogger(__name__)

get_tasks_agreement_queryset = load_func(settings.GET_TASKS_AGREEMENT_QUERYSET)


def get_next_task_logging_level(user):
level = logging.DEBUG
Expand Down Expand Up @@ -143,35 +141,13 @@ def get_not_solved_tasks_qs(user, project, prepared_tasks, assigned_flag, queue_
user_postponed_tasks = postponed_drafts.distinct().values_list('task__pk', flat=True)
not_solved_tasks = not_solved_tasks.exclude(pk__in=user_postponed_tasks)

prioritized_on_agreement = False
# if annotator is assigned for tasks, he must solve it regardless of is_labeled=True
# if annotator is assigned for tasks, he must to solve it regardless of is_labeled=True
if not assigned_flag:
# include tasks that have been completed if their agreement is not at threshold if threshold setting is set
lse_project = getattr(project, 'lse_project', None)
if (
lse_project
and lse_project.agreement_threshold is not None
and get_tasks_agreement_queryset
and user.is_annotator
):
not_solved_tasks = (
get_tasks_agreement_queryset(not_solved_tasks)
# include tasks that are not labeled or are labeled but fall below the agreement threshold
.filter(
Q(_agreement__lt=lse_project.agreement_threshold, is_labeled=True) | Q(is_labeled=False)
).annotate(annotators=Count('annotations__completed_by', distinct=True))
# skip tasks that have been annotated by the maximum additional number of annotators
.filter(annotators__lt=F('overlap') + lse_project.max_additional_annotators_assignable)
)
prioritized_on_agreement, not_solved_tasks = _prioritize_low_agreement_tasks(not_solved_tasks, lse_project)

# otherwise, filtering out completed tasks is sufficient
else:
not_solved_tasks = not_solved_tasks.filter(is_labeled=False)
not_solved_tasks = not_solved_tasks.filter(is_labeled=False)

if not flag_set('fflag_fix_back_lsdv_4523_show_overlap_first_order_27022023_short'):
# show tasks with overlap > 1 first (unless tasks are already prioritized on agreement)
if project.show_overlap_first and not prioritized_on_agreement:
# show tasks with overlap > 1 first
if project.show_overlap_first:
# don't output anything - just filter tasks with overlap
logger.debug(f'User={user} tries overlap first from prepared tasks')
_, not_solved_tasks = _try_tasks_with_overlap(not_solved_tasks)
Expand All @@ -180,11 +156,6 @@ def get_not_solved_tasks_qs(user, project, prepared_tasks, assigned_flag, queue_
return not_solved_tasks, user_solved_tasks_array, queue_info


def _prioritize_low_agreement_tasks(tasks, lse_project):
low_agreement_tasks = tasks.filter(_agreement__lt=lse_project.agreement_threshold, is_labeled=True)
return (True, low_agreement_tasks) if low_agreement_tasks else (False, tasks)


def get_next_task_without_dm_queue(user, project, not_solved_tasks, assigned_flag):
next_task = None
use_task_lock = True
Expand All @@ -211,7 +182,7 @@ def get_next_task_without_dm_queue(user, project, not_solved_tasks, assigned_fla
queue_info += (' & ' if queue_info else '') + 'Ground truth queue'

if not next_task and project.maximum_annotations > 1:
# if there are any tasks in progress (with maximum number of annotations), randomly sampling from them
# if there any tasks in progress (with maximum number of annotations), randomly sampling from them
logger.debug(f'User={user} tries depth first from prepared tasks')
next_task = _try_breadth_first(not_solved_tasks, user)
if next_task:
Expand Down
25 changes: 2 additions & 23 deletions label_studio/tasks/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,7 @@ def has_lock(self, user=None):
num_locks = self.num_locks_user(user=user)
num_annotations = self.annotations.exclude(q | Q(ground_truth=True)).count()
num = num_locks + num_annotations

if num > self.overlap_with_agreement_threshold(num, num_locks):
if num > self.overlap:
logger.error(
f'Num takes={num} > overlap={self.overlap} for task={self.id}, '
f"skipped mode {self.project.skip_queue} - it's a bug",
Expand All @@ -236,8 +235,7 @@ def has_lock(self, user=None):
self.update_is_labeled()
if self.is_labeled is True:
self.save(update_fields=['is_labeled'])

result = bool(num >= self.overlap_with_agreement_threshold(num, num_locks))
result = bool(num >= self.overlap)
logger.log(
get_next_task_logging_level(user),
f'Task {self} locked: {result}; num_locks: {num_locks} num_annotations: {num_annotations} '
Expand All @@ -249,25 +247,6 @@ def has_lock(self, user=None):
def num_locks(self):
return self.locks.filter(expire_at__gt=now()).count()

def overlap_with_agreement_threshold(self, num, num_locks):
try:
from stats.models import get_task_agreement
except (ModuleNotFoundError, ImportError):
pass

# Limit to one extra annotator at a time when the task is under the threshold and meets the overlap criteria,
# regardless of the max_additional_annotators_assignable setting. This ensures recalculating agreement after
# each annotation and prevents concurrent annotations from dropping the agreement below the threshold.
if hasattr(self.project, 'lse_project') and self.project.lse_project.agreement_threshold is not None:
agreement = get_task_agreement(self)
if agreement is not None and agreement < self.project.lse_project.agreement_threshold:
return (
min(self.overlap + self.project.lse_project.max_additional_annotators_assignable, num + 1)
if num_locks == 0
else num
)
return self.overlap

def num_locks_user(self, user):
return self.locks.filter(expire_at__gt=now()).exclude(user=user).count()

Expand Down

0 comments on commit c5e6bd5

Please sign in to comment.