Skip to content

Commit

Permalink
Validate annotators emails when import previous exports (HumanSignal#…
Browse files Browse the repository at this point in the history
…1184)

* Validate annotators emails when import previous exports

* Fix default completed_by, add test samples

* Remove unused code

* Address comments

Co-authored-by: nik <[email protected]>
  • Loading branch information
niklub and nik committed Jul 14, 2021
1 parent 26abda7 commit 26aed24
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 32 deletions.
69 changes: 37 additions & 32 deletions label_studio/tasks/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,17 +217,29 @@ def to_internal_value(self, data):

return ret

@staticmethod
def get_completed_by_id(annotation, default=None):
completed_by = annotation.get('completed_by', None)
# user id as is
if completed_by and isinstance(completed_by, int):
return completed_by
# user dict
if completed_by and isinstance(completed_by, dict):
return completed_by.get('id')

return default
def _insert_valid_completed_by_id_or_raise(self, annotations, members_email_to_id, members_ids, default_user):
for annotation in annotations:
completed_by = annotation.get('completed_by')
# no completed_by info found - just skip it, will be assigned to the user who imports
if completed_by is None:
annotation['completed_by_id'] = default_user.id

# resolve annotators by email
elif isinstance(completed_by, dict):
if 'email' not in completed_by:
raise ValidationError(f"It's expected to have 'email' field in 'completed_by' data in annotations")
email = completed_by['email']
if email not in members_email_to_id:
raise ValidationError(f"Unknown annotator's email {email}")
# overwrite an actual member ID
annotation['completed_by_id'] = members_email_to_id[email]

# old style annotators specification - try to find them by ID
elif isinstance(completed_by, int) and completed_by in members_ids:
if completed_by not in members_ids:
raise ValidationError(f"Unknown annotator's ID {completed_by}")
annotation['completed_by_id'] = completed_by
annotation.pop('completed_by', None)

@retry_database_locked()
def create(self, validated_data):
Expand All @@ -238,28 +250,25 @@ def create(self, validated_data):
user = self.context.get('user', None)
project = self.context.get('project')

organization = user.active_organization \
if not project.created_by.active_organization else project.created_by.active_organization
members_email_to_id = dict(organization.members.values_list('user__email', 'user__id'))
members_ids = set(members_email_to_id.values())
logger.debug(f"{len(members_email_to_id)} members found in organization {organization}")

# to be sure we add tasks with annotations at the same time
with transaction.atomic():

# extract annotations and predictions
task_annotations, task_predictions = [], []
for task in validated_tasks:
task_annotations.append(task.pop('annotations', []))
task_predictions.append(task.pop('predictions', []))

# check annotator permissions for completed by
organization = user.active_organization \
if not project.created_by.active_organization else project.created_by.active_organization
project_user_ids = organization.members.values_list('user__id', flat=True)
annotator_ids = set()
for annotations in task_annotations:
for annotation in annotations:
annotator_ids.add(self.get_completed_by_id(annotation))

for i in annotator_ids:
if i not in project_user_ids and i is not None:
raise ValidationError(f'Annotations with "completed_by"={i} are produced by annotator '
f'who is not allowed for this project as invited annotator or team member')
annotations = task.pop('annotations', [])
# insert a valid "completed_by_id" by existing member
self._insert_valid_completed_by_id_or_raise(
annotations, members_email_to_id, members_ids, user or project.created_by)
predictions = task.pop('predictions', [])
task_annotations.append(annotations)
task_predictions.append(predictions)

# add tasks first
for task in validated_tasks:
Expand Down Expand Up @@ -292,13 +301,9 @@ def create(self, validated_data):
if 'ground_truth' in annotation:
ground_truth = annotation.pop('ground_truth', True)

# get user id
completed_by_id = self.get_completed_by_id(annotation, default=user.id if user else None)
annotation.pop('completed_by', None)

db_annotations.append(Annotation(task=self.db_tasks[i],
ground_truth=ground_truth,
completed_by_id=completed_by_id,
completed_by_id=annotation['completed_by_id'],
result=annotation['result']))

# add predictions
Expand Down
68 changes: 68 additions & 0 deletions label_studio/tests/data_import.tavern.yml
Original file line number Diff line number Diff line change
Expand Up @@ -350,3 +350,71 @@ stages:
deleted: 2
status_code: 200

---
test_name: import_previous_export
strict: false
marks:
- usefixtures:
- django_live_url
stages:
- id: signup
type: ref
- name: stage
request:
data:
label_config: <View><Image name="image" value="$image"/><Choices name="label" toName="image"><Choice
value="Cat"/><Choice value="Dog"/></Choices></View>
title: Image Classification Project
method: POST
url: '{django_live_url}/api/projects'
response:
save:
json:
project_pk: id
status_code: 201
- name: stage
request:
files:
json: tests/test_suites/samples/previous_export.json
headers:
content-type: multipart/form-data
method: POST
url: '{django_live_url}/api/projects/{project_pk}/import'
response:
status_code: 400
json:
validation_errors:
non_field_errors:
- "Unknown annotator's email [email protected]"
# Now let's create missed user and try again...
- id: logout
type: ref
- id: signup
name: Sign up
request:
url: "{django_live_url}/user/signup"
data:
email: [email protected]
password: 12345678
method: POST
response:
status_code: 302
- id: login
type: ref
- name: stage
request:
files:
json: tests/test_suites/samples/previous_export.json
headers:
content-type: multipart/form-data
method: POST
url: '{django_live_url}/api/projects/{project_pk}/import'
response:
status_code: 201
json:
task_count: 1
annotation_count: 1
prediction_count: 0
could_be_tasks_list: false
found_formats: {'.json': 1}
data_columns: ['image']
17 changes: 17 additions & 0 deletions label_studio/tests/shared_stages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,23 @@ stages:
method: POST
response:
status_code: 302
- id: login
name: Login
request:
url: "{django_live_url}/user/login"
data:
email: [email protected]
password: 12345678
method: POST
response:
status_code: 302
- id: logout
name: logout
request:
url: "{django_live_url}/logout"
method: GET
response:
status_code: 302
- id: create_project
name: create_project
request:
Expand Down
43 changes: 43 additions & 0 deletions label_studio/tests/test_suites/samples/previous_export.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"id": 1234,
"annotations": [
{
"id": 5678,
"completed_by": {
"id": 42,
"email": "[email protected]",
"first_name": "",
"last_name": ""
},
"result": [
{
"id": "aBcDeF",
"type": "choices",
"value": {
"choices": [
"Cat"
]
},
"to_name": "image",
"from_name": "label"
}
],
"was_cancelled": false,
"ground_truth": false,
"created_at": "2021-07-06T06:06:25.927570Z",
"updated_at": "2021-07-06T06:44:08.446371Z",
"lead_time": 346193.271,
"prediction": {},
"result_count": 0,
"task": 1234
}
],
"predictions": [],
"data": {
"image": "https://pytest.labelstud.io/mypets.jpg"
},
"meta": {},
"created_at": "2021-05-28T18:56:52.086267Z",
"updated_at": "2021-07-06T06:44:08.420891Z",
"project": 49
}

0 comments on commit 26aed24

Please sign in to comment.