Skip to content

Commit 55a1a9b

Browse files
authored
Create mitxonline problem engagement marts (#953)
* create mart for mitxonline problem engagements * add problem summary mart to count for showanswer * update description
1 parent 71ab353 commit 55a1a9b

6 files changed

+288
-0
lines changed

src/ol_dbt/models/intermediate/mitxonline/_int_mitxonline__models.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,6 +1469,11 @@ models:
14691469
description: number, The number of times the user attempted to answer this problem
14701470
tests:
14711471
- not_null
1472+
- name: useractivity_problem_student_answers
1473+
description: json, student answers to this problem in problem_id and internal
1474+
answer pair. For multiple questions, it lists every question and answer.
1475+
tests:
1476+
- not_null
14721477
- name: useractivity_problem_success
14731478
description: str, It's either 'correct' or 'incorrect'
14741479
tests:
@@ -1543,6 +1548,41 @@ models:
15431548
column_list: ["user_username", "courserun_readable_id", "useractivity_problem_id",
15441549
"useractivity_timestamp"]
15451550

1551+
- name: int__mitxonline__user_courseactivity_showanswer
1552+
description: MITx Online learners show answer events within a course
1553+
columns:
1554+
- name: user_username
1555+
description: str, username of the open edX user
1556+
tests:
1557+
- not_null
1558+
- name: openedx_user_id
1559+
description: int, open edX user ID extracted from context field. This id doesn't
1560+
always match with auth_user in open edX table, there can be multiple openedx_user_ids
1561+
for the same user_username. For those cases, use openedx_user_id from auth_user
1562+
open edX table.
1563+
tests:
1564+
- not_null
1565+
- name: courserun_readable_id
1566+
description: str, Open edX Course ID formatted as course-v1:{org}+{course code}+{run_tag}
1567+
tests:
1568+
- not_null
1569+
- name: useractivity_path
1570+
description: str, relative url path of page when the answer to a problem is shown
1571+
event.
1572+
- name: useractivity_problem_id
1573+
description: str, Unique ID for this problem in a course, formatted as block-v1:{org)+{course
1574+
ID}+type@problem+block@{hash code}.
1575+
tests:
1576+
- not_null
1577+
- name: useractivity_timestamp
1578+
description: timestamp, time for this show answer event
1579+
tests:
1580+
- not_null
1581+
tests:
1582+
- dbt_expectations.expect_compound_columns_to_be_unique:
1583+
column_list: ["user_username", "courserun_readable_id", "useractivity_problem_id",
1584+
"useractivity_timestamp"]
1585+
15461586
- name: int__mitxonline__user_courseactivity_discussion
15471587
description: MITx Online learners discussion forum interaction within a course
15481588
columns:

src/ol_dbt/models/intermediate/mitxonline/int__mitxonline__user_courseactivity_problemcheck.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ select
1313
, useractivity_timestamp
1414
, json_query(useractivity_context_object, 'lax $.module.display_name' omit quotes) as useractivity_problem_name
1515
, json_query(useractivity_event_object, 'lax $.problem_id' omit quotes) as useractivity_problem_id
16+
, json_query(useractivity_event_object, 'lax $.answers' omit quotes) as useractivity_problem_student_answers
1617
, json_query(useractivity_event_object, 'lax $.attempts' omit quotes) as useractivity_problem_attempts
1718
, json_query(useractivity_event_object, 'lax $.success' omit quotes) as useractivity_problem_success
1819
, json_query(useractivity_event_object, 'lax $.grade' omit quotes) as useractivity_problem_current_grade
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{{ config(materialized='view') }}
2+
3+
with course_activities as (
4+
select * from {{ ref('stg__mitxonline__openedx__tracking_logs__user_activity') }}
5+
where courserun_readable_id is not null
6+
)
7+
8+
select
9+
user_username
10+
, courserun_readable_id
11+
, openedx_user_id
12+
, useractivity_path
13+
, useractivity_timestamp
14+
, json_query(useractivity_event_object, 'lax $.problem_id' omit quotes) as useractivity_problem_id
15+
from course_activities
16+
where useractivity_event_type = 'showanswer'

src/ol_dbt/models/marts/mitxonline/_marts__mitxonline__models.yml

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,3 +250,120 @@ models:
250250
description: timestamp, time of this video event
251251
tests:
252252
- not_null
253+
254+
- name: marts__mitxonline_problem_submissions
255+
description: MITx Online learners problem submissions
256+
columns:
257+
- name: user_username
258+
description: str, username of the open edX user
259+
tests:
260+
- not_null
261+
- name: courserun_readable_id
262+
description: str, Open edX Course ID formatted as course-v1:{org}+{course code}+{run_tag}
263+
tests:
264+
- not_null
265+
- name: problem_id
266+
description: str, Unique ID for this problem in a course. It's formatted in block-v1:{org)+{course
267+
ID}+type@problem+block@{hash code}
268+
tests:
269+
- not_null
270+
- name: problem_name
271+
description: str, display name of this problem in a course
272+
tests:
273+
- not_null
274+
- name: num_attempts
275+
description: number, The number of times the user had attempted at the time of
276+
this submission.
277+
tests:
278+
- not_null
279+
- name: student_answers
280+
description: json, student answers to this problem in a name/value (problem_id/answer)
281+
pair. For multiple questions, it lists every question and answer.
282+
tests:
283+
- not_null
284+
- name: problem_success
285+
description: str, It's either 'correct' or 'incorrect'
286+
tests:
287+
- not_null
288+
- name: problem_grade
289+
description: number, user grade for this problem at the time of this submission.
290+
It can range from 0 to the max possible grade for this problem. e.g. if problem_max_grade
291+
is 12, problem_grade can be anywhere between 0 to 12.
292+
tests:
293+
- not_null
294+
- name: problem_max_grade
295+
description: number, Maximum possible grade value for this problem.
296+
tests:
297+
- not_null
298+
- name: problem_submission_timestamp
299+
description: timestamp, time for this problem submission
300+
tests:
301+
- not_null
302+
- name: is_most_recent_attempt
303+
description: boolean, indicating if this submission is the most recent attempt.
304+
Useful to filter out any previous answers
305+
tests:
306+
- not_null
307+
- name: user_email
308+
description: str, user email on MITx Online
309+
- name: user_full_name
310+
description: str, user full name on MITx Online
311+
- name: course_number
312+
description: str, unique string for the course. It can contain letters, numbers,
313+
or periods. e.g. 18.03.1x
314+
tests:
315+
- not_null
316+
- name: courserun_title
317+
description: str, title of the course run
318+
- name: courserun_start_on
319+
description: timestamp, datetime on when the course begins
320+
- name: courserun_end_on
321+
description: timestamp, datetime on when the course ends
322+
tests:
323+
- dbt_expectations.expect_compound_columns_to_be_unique:
324+
column_list: ["user_username", "courserun_readable_id", "problem_id", "problem_submission_timestamp"]
325+
326+
- name: marts__mitxonline_problem_summary
327+
description: MITx Online learners problem performance summary with a particular
328+
problem
329+
columns:
330+
- name: user_username
331+
description: str, username of the open edX user
332+
tests:
333+
- not_null
334+
- name: courserun_readable_id
335+
description: str, Open edX Course ID formatted as course-v1:{org}+{course code}+{run_tag}
336+
tests:
337+
- not_null
338+
- name: problem_id
339+
description: str, Unique ID for this problem in a course. It's formatted in block-v1:{org)+{course
340+
ID}+type@problem+block@{hash code}
341+
tests:
342+
- not_null
343+
- name: num_showanswer
344+
description: number, The number of times the user clicked on showanswer for this
345+
problem.
346+
- name: num_attempts
347+
description: number, The number of times the user had attempted this problem.
348+
Null if user has never attempted this problem
349+
- name: problem_success
350+
description: str, indicating if user's most recent attempt to this problem is
351+
correct or incorrect. Null if user has never attempted this problem
352+
- name: user_email
353+
description: str, user email on MITx Online
354+
- name: user_full_name
355+
description: str, user full name on MITx Online
356+
- name: course_number
357+
description: str, unique string for the course. It can contain letters, numbers,
358+
or periods. e.g. 18.03.1x
359+
tests:
360+
- not_null
361+
- name: courserun_title
362+
description: str, title of the course run
363+
- name: courserun_start_on
364+
description: timestamp, datetime on when the course begins
365+
- name: courserun_end_on
366+
description: timestamp, datetime on when the course ends
367+
tests:
368+
- dbt_expectations.expect_compound_columns_to_be_unique:
369+
column_list: ["user_username", "courserun_readable_id", "problem_id"]
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
with problem_response as (
2+
select
3+
*
4+
, row_number() over (
5+
partition by courserun_readable_id, user_username, useractivity_problem_id
6+
order by useractivity_problem_attempts desc
7+
) as most_recent_num
8+
from {{ ref('int__mitxonline__user_courseactivity_problemcheck') }}
9+
)
10+
11+
, course_runs as (
12+
select * from {{ ref('int__mitxonline__course_runs') }}
13+
)
14+
15+
, users as (
16+
select * from {{ ref('int__mitxonline__users') }}
17+
)
18+
19+
select
20+
problem_response.user_username
21+
, problem_response.courserun_readable_id
22+
, problem_response.useractivity_problem_id as problem_id
23+
, problem_response.useractivity_problem_name as problem_name
24+
, problem_response.useractivity_problem_attempts as num_attempts
25+
, problem_response.useractivity_problem_student_answers as student_answers
26+
, problem_response.useractivity_problem_success as problem_success
27+
, problem_response.useractivity_problem_current_grade as problem_grade
28+
, problem_response.useractivity_problem_max_grade as problem_max_grade
29+
, problem_response.useractivity_timestamp as problem_submission_timestamp
30+
, users.user_full_name
31+
, users.user_email
32+
, course_runs.courserun_title
33+
, course_runs.course_number
34+
, course_runs.courserun_start_on
35+
, course_runs.courserun_end_on
36+
, if(problem_response.most_recent_num = 1, true, false) as is_most_recent_attempt
37+
from problem_response
38+
inner join course_runs on problem_response.courserun_readable_id = course_runs.courserun_readable_id
39+
left join users on problem_response.user_username = users.user_username
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
with showanswers as (
2+
select * from {{ ref('int__mitxonline__user_courseactivity_showanswer') }}
3+
)
4+
5+
, showanswers_stats as (
6+
select
7+
user_username
8+
, courserun_readable_id
9+
, useractivity_problem_id
10+
, count(*) as num_showanswer
11+
from showanswers
12+
group by
13+
user_username
14+
, courserun_readable_id
15+
, useractivity_problem_id
16+
)
17+
18+
, problem_attempts as (
19+
select
20+
*
21+
, row_number() over (
22+
partition by courserun_readable_id, user_username, useractivity_problem_id
23+
order by useractivity_problem_attempts desc
24+
) as row_num
25+
from {{ ref('int__mitxonline__user_courseactivity_problemcheck') }}
26+
)
27+
28+
, most_recent_attempts as (
29+
select *
30+
from problem_attempts
31+
where row_num = 1
32+
)
33+
34+
, course_runs as (
35+
select * from {{ ref('int__mitxonline__course_runs') }}
36+
)
37+
38+
, users as (
39+
select * from {{ ref('int__mitxonline__users') }}
40+
)
41+
42+
, combined as (
43+
select
44+
showanswers_stats.num_showanswer
45+
, most_recent_attempts.useractivity_problem_attempts as num_attempts
46+
, most_recent_attempts.useractivity_problem_success as problem_success
47+
, coalesce(showanswers_stats.user_username, most_recent_attempts.user_username) as user_username
48+
, coalesce(showanswers_stats.courserun_readable_id, most_recent_attempts.courserun_readable_id)
49+
as courserun_readable_id
50+
, coalesce(showanswers_stats.useractivity_problem_id, most_recent_attempts.useractivity_problem_id)
51+
as useractivity_problem_id
52+
from showanswers_stats
53+
full outer join most_recent_attempts
54+
on
55+
showanswers_stats.user_username = most_recent_attempts.user_username
56+
and showanswers_stats.courserun_readable_id = most_recent_attempts.courserun_readable_id
57+
and showanswers_stats.useractivity_problem_id = most_recent_attempts.useractivity_problem_id
58+
)
59+
60+
select
61+
combined.user_username
62+
, combined.courserun_readable_id
63+
, combined.useractivity_problem_id as problem_id
64+
, combined.num_showanswer
65+
, combined.num_attempts
66+
, combined.problem_success
67+
, users.user_full_name
68+
, users.user_email
69+
, course_runs.courserun_title
70+
, course_runs.course_number
71+
, course_runs.courserun_start_on
72+
, course_runs.courserun_end_on
73+
from combined
74+
inner join course_runs on combined.courserun_readable_id = course_runs.courserun_readable_id
75+
left join users on combined.user_username = users.user_username

0 commit comments

Comments
 (0)