Skip to content

Commit

Permalink
feat: xblock skill verification event (#165)
Browse files Browse the repository at this point in the history
Adds data class and event to send skill verification data for an XBlock.

Updates avro serialization & de serialization just enough to support array types.

Description: The idea is that users will verify the tags/skills associated to an XBlock. We want to send this data via openedx-event signals to course-discovery and update the relevant tables.
  • Loading branch information
navinkarkera authored Feb 17, 2023
1 parent 900c92b commit 26a8dd5
Show file tree
Hide file tree
Showing 11 changed files with 165 additions and 10 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ Change Log
Unreleased
----------

[5.1.0] - 2023-02-07
---------------------
Added
~~~~~~~
* Added support for array type.
* Added new XBLOCK_SKILL_VERIFIED event.
* Added XBlockSkillVerificationData classes.

[5.0.0] - 2023-02-03
--------------------
Changed
Expand Down
2 changes: 1 addition & 1 deletion openedx_events/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
more information about the project.
"""

__version__ = "5.0.0"
__version__ = "5.1.0"
17 changes: 16 additions & 1 deletion openedx_events/event_bus/avro/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
Deserialize Avro record dictionaries to events that can be sent with OpenEdxPublicSignals.
"""
import json
from typing import get_args, get_origin

import attr

from .custom_serializers import DEFAULT_CUSTOM_SERIALIZERS
from .schema import schema_from_signal
from .types import PYTHON_TYPE_TO_AVRO_MAPPING
from .types import PYTHON_TYPE_TO_AVRO_MAPPING, SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING

# Dict of class to deserialize methods (e.g. datetime => DatetimeAvroSerializer.deserialize)
DEFAULT_DESERIALIZERS = {serializer.cls: serializer.deserialize for serializer in DEFAULT_CUSTOM_SERIALIZERS}
Expand All @@ -29,11 +30,25 @@ def _deserialized_avro_record_dict_to_object(data: dict, data_type, deserializer
"""
param_deserializers = deserializers or {}
all_deserializers = {**DEFAULT_DESERIALIZERS, **param_deserializers}
# get generic type of data_type
# if data_type == List[int], data_type_origin = list
data_type_origin = get_origin(data_type)

if deserializer := all_deserializers.get(data_type, None):
return deserializer(data)
elif data_type in PYTHON_TYPE_TO_AVRO_MAPPING:
return data
elif data_type_origin == list:
# returns types of list contents
# if data_type == List[int], arg_data_type = (int,)
arg_data_type = get_args(data_type)
if not arg_data_type:
raise TypeError(
"List without annotation type is not supported. The argument should be a type, for eg., List[int]"
)
# check whether list items type is in basic types.
if arg_data_type[0] in SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING:
return data
elif hasattr(data_type, "__attrs_attrs__"):
transformed = {}
for attribute in data_type.__attrs_attrs__:
Expand Down
26 changes: 22 additions & 4 deletions openedx_events/event_bus/avro/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
"""


from typing import get_args, get_origin

from .custom_serializers import DEFAULT_CUSTOM_SERIALIZERS
from .types import PYTHON_TYPE_TO_AVRO_MAPPING
from .types import PYTHON_TYPE_TO_AVRO_MAPPING, SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING

DEFAULT_FIELD_TYPES = {serializer.cls: serializer.field_type for serializer in DEFAULT_CUSTOM_SERIALIZERS}

Expand Down Expand Up @@ -51,19 +53,35 @@ def _create_avro_field_definition(data_key, data_type, previously_seen_types,
"""
field = {"name": data_key}
all_field_type_overrides = custom_type_to_avro_type or {}
# get generic type of data_type
# if data_type == List[int], data_type_origin = list
data_type_origin = get_origin(data_type)

# Case 1: data_type has a predetermined avro field representation
if field_type := all_field_type_overrides.get(data_type, None):
field["type"] = field_type
# Case 2: data_type is a simple type that can be converted directly to an Avro type
elif data_type in PYTHON_TYPE_TO_AVRO_MAPPING:
if PYTHON_TYPE_TO_AVRO_MAPPING[data_type] in ["record", "array"]:
# Can implement if needed, but for now it doesn't seem to be necessary.
# pylint: disable-next=broad-exception-raised
raise Exception("Unable to generate Avro schema for dict or array fields")
raise Exception("Unable to generate Avro schema for dict or array fields without annotation types.")
avro_type = PYTHON_TYPE_TO_AVRO_MAPPING[data_type]
field["type"] = avro_type

elif data_type_origin == list:
# returns types of list contents
# if data_type == List[int], arg_data_type = (int,)
arg_data_type = get_args(data_type)
if not arg_data_type:
raise TypeError(
"List without annotation type is not supported. The argument should be a type, for eg., List[int]"
)
avro_type = SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING.get(arg_data_type[0])
if avro_type is None:
raise TypeError(
"Only following types are supported for list arguments:"
f" {set(SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING.keys())}"
)
field["type"] = {"type": PYTHON_TYPE_TO_AVRO_MAPPING[data_type_origin], "items": avro_type}
# Case 3: data_type is an attrs class
elif hasattr(data_type, "__attrs_attrs__"):
# Inner Attrs Class
Expand Down
3 changes: 2 additions & 1 deletion openedx_events/event_bus/avro/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ def _serialize_non_attrs_values(inst, field, value): # pylint: disable=unused-a

for extended_class, serializer in all_serializers.items():
if field:
if issubclass(field.type, extended_class):
# Make sure that field.type is a class first.
if isinstance(field.type, type) and issubclass(field.type, extended_class):
return serializer(value)
if issubclass(type(value), extended_class):
return serializer(value)
Expand Down
2 changes: 2 additions & 0 deletions openedx_events/event_bus/avro/tests/test_avro.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Test interplay of the various Avro helper classes"""
from datetime import datetime
from typing import List
from unittest import TestCase

from opaque_keys.edx.keys import CourseKey, UsageKey
Expand Down Expand Up @@ -51,6 +52,7 @@ def generate_test_event_data_for_data_type(data_type):
UsageKey: UsageKey.from_string(
"block-v1:edx+DemoX+Demo_course+type@video+block@UaEBjyMjcLW65gaTXggB93WmvoxGAJa0JeHRrDThk",
),
List[int]: [1, 2, 3],
datetime: datetime.now(),
}
for attribute in data_type.__attrs_attrs__:
Expand Down
58 changes: 58 additions & 0 deletions openedx_events/event_bus/avro/tests/test_deserializer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Tests for avro.deserializer"""
import json
from datetime import datetime
from typing import List
from unittest import TestCase

from opaque_keys.edx.keys import CourseKey, UsageKey
Expand Down Expand Up @@ -175,3 +176,60 @@ def test_deserialization_of_nested_optional_fields(self):
nested_field = data_dict["data"].field_0
self.assertIsInstance(nested_field, SimpleAttrsWithDefaults)
self.assertEqual(nested_field, SimpleAttrsWithDefaults())

def test_deserialization_of_list_with_annotation(self):
"""
Check that deserialization works as expected when list data is annotated.
"""
LIST_SIGNAL = create_simple_signal({"list_input": List[int]})
initial_dict = {"list_input": [1, 3]}
deserializer = AvroSignalDeserializer(LIST_SIGNAL)
event_data = deserializer.from_dict(initial_dict)
expected_event_data = [1, 3]
test_data = event_data["list_input"]
self.assertIsInstance(test_data, list)
self.assertEqual(test_data, expected_event_data)

def test_deserialization_of_list_without_annotation(self):
"""
Check that deserialization raises error when list data is not annotated.
"""
# create dummy signal to bypass schema check while initializing deserializer
# This allows us to test whether correct exceptions are raised while deserializing data
SIGNAL = create_simple_signal({"list_input": List[int]})
LIST_SIGNAL = create_simple_signal({"list_input": List})
initial_dict = {"list_input": [1, 3]}
deserializer = AvroSignalDeserializer(SIGNAL)
# Update signal with incomplete type info
deserializer.signal = LIST_SIGNAL
with self.assertRaises(TypeError):
deserializer.from_dict(initial_dict)

def test_deserialization_of_nested_list_fails(self):
"""
Check that deserialization raises error when nested list data is passed.
"""
# create dummy signal to bypass schema check while initializing deserializer
# This allows us to test whether correct exceptions are raised while deserializing data
SIGNAL = create_simple_signal({"list_input": List[int]})
LIST_SIGNAL = create_simple_signal({"list_input": List[List[int]]})
initial_dict = {"list_input": [[1, 3], [4, 5]]}
deserializer = AvroSignalDeserializer(SIGNAL)
# Update signal with incomplete type info
deserializer.signal = LIST_SIGNAL
with self.assertRaises(TypeError):
deserializer.from_dict(initial_dict)

def test_deserialization_of_nested_list_with_complex_types_fails(self):
SIGNAL = create_simple_signal({"list_input": List[list]})
with self.assertRaises(TypeError):
AvroSignalDeserializer(SIGNAL)
initial_dict = {"list_input": [[1, 3], [4, 5]]}
# create dummy signal to bypass schema check while initializing deserializer
# This allows us to test whether correct exceptions are raised while deserializing data
DUMMY_SIGNAL = create_simple_signal({"list_input": List[int]})
deserializer = AvroSignalDeserializer(DUMMY_SIGNAL)
# Update signal with incorrect type info
deserializer.signal = SIGNAL
with self.assertRaises(TypeError):
deserializer.from_dict(initial_dict)
21 changes: 20 additions & 1 deletion openedx_events/event_bus/avro/tests/test_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Tests for event_bus.avro.schema module
"""
from typing import List
from unittest import TestCase

from openedx_events.event_bus.avro.schema import schema_from_signal
Expand Down Expand Up @@ -235,11 +236,29 @@ class UnextendedClass:
with self.assertRaises(TypeError):
schema_from_signal(SIGNAL)

def test_throw_exception_to_list_or_dict_types(self):
def test_throw_exception_to_list_or_dict_types_without_annotation(self):
LIST_SIGNAL = create_simple_signal({"list_input": list})
DICT_SIGNAL = create_simple_signal({"list_input": dict})
LIST_WITHOUT_ANNOTATION_SIGNAL = create_simple_signal({"list_input": List})
with self.assertRaises(Exception):
schema_from_signal(LIST_SIGNAL)

with self.assertRaises(Exception):
schema_from_signal(DICT_SIGNAL)

with self.assertRaises(TypeError):
schema_from_signal(LIST_WITHOUT_ANNOTATION_SIGNAL)

def test_list_with_annotation_works(self):
LIST_SIGNAL = create_simple_signal({"list_input": List[int]})
expected_dict = {
'name': 'CloudEvent',
'type': 'record',
'doc': 'Avro Event Format for CloudEvents created with openedx_events/schema',
'fields': [{
'name': 'list_input',
'type': {'type': 'array', 'items': 'long'},
}],
}
schema = schema_from_signal(LIST_SIGNAL)
self.assertDictEqual(schema, expected_dict)
7 changes: 5 additions & 2 deletions openedx_events/event_bus/avro/types.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""A mapping of python types to the Avro type that we want to use make valid avro schema."""
PYTHON_TYPE_TO_AVRO_MAPPING = {
None: "null",
SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING = {
bool: "boolean",
int: "long",
float: "double",
bytes: "bytes",
str: "string",
}
PYTHON_TYPE_TO_AVRO_MAPPING = {
**SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING,
None: "null",
dict: "record",
list: "array",
}
18 changes: 18 additions & 0 deletions openedx_events/learning/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,21 @@ class PersistentCourseGradeData:
percent_grade = attr.ib(type=float)
letter_grade = attr.ib(type=str)
passed_timestamp = attr.ib(type=datetime)


@attr.s(frozen=True)
class XBlockSkillVerificationData:
"""
Data needed to update verification count of tags/skills for an XBlock.
User feedback on whether tags/skills related to an XBlock are valid.
Arguments:
usage_key (UsageKey): identifier of the XBlock object.
verified_skills (List[int]): list of verified skill ids.
ignored_skills (List[int]): list of ignored skill ids.
"""

usage_key = attr.ib(type=UsageKey)
verified_skills = attr.ib(type=List[int], factory=list)
ignored_skills = attr.ib(type=List[int], factory=list)
13 changes: 13 additions & 0 deletions openedx_events/learning/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
CourseEnrollmentData,
PersistentCourseGradeData,
UserData,
XBlockSkillVerificationData,
)
from openedx_events.tooling import OpenEdxPublicSignal

Expand Down Expand Up @@ -148,3 +149,15 @@
"grade": PersistentCourseGradeData,
}
)


# .. event_type: org.openedx.learning.xblock.skill.verified.v1
# .. event_name: XBLOCK_SKILL_VERIFIED
# .. event_description: Fired when an XBlock skill is verified.

This comment has been minimized.

Copy link
@robrap

robrap May 18, 2023

Contributor

@navinkarkera: It would be great if here and/or with XBlockSkillVerificationData there were a link to documentation to explain what skill verification is, and what verified and ignored skill ids are.

# .. event_data: XBlockSkillVerificationData
XBLOCK_SKILL_VERIFIED = OpenEdxPublicSignal(
event_type="org.openedx.content_authoring.xblock.skill.verified.v1",
data={
"xblock_info": XBlockSkillVerificationData,
}
)

0 comments on commit 26a8dd5

Please sign in to comment.