-
Notifications
You must be signed in to change notification settings - Fork 29
Add get_loftee_end_trunc_filter_expr
and update_loftee_end_trunc_filter
to patch the LOFTEE END_TRUNC filter
#740
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1003,3 +1003,88 @@ def explode_by_vep_annotation( | |
t = t.explode_rows(t[vep_annotation]) | ||
|
||
return t | ||
|
||
|
||
def get_loftee_end_trunc_filter_expr( | ||
csq_expr: hl.expr.StructExpression, | ||
gerp_dist_cutoff: float = 0.0, | ||
) -> hl.expr.BooleanExpression: | ||
""" | ||
Get the expression for LOFTEE's END_TRUNC filter based on the GERP distance cutoff. | ||
|
||
The end truncation filter is based on the GERP distance cutoff (`gerp_dist_cutoff`) | ||
and the 'GERP_DIST' and '50_BP_RULE' annotations in the LOFTEE annotations. | ||
|
||
True is returned if the GERP distance is less than the cutoff and the '50_BP_RULE' | ||
annotation is not 'PASS'. | ||
|
||
:param csq_expr: StructExpression containing the LOFTEE annotation 'lof_info', with | ||
'GERP_DIST' and '50_BP_RULE' info. | ||
:param gerp_dist_cutoff: GERP distance cutoff for end truncation. Default is 0.0. | ||
:return: BooleanExpression for end truncation annotation. | ||
""" | ||
lof_info_expr = hl.dict( | ||
csq_expr.lof_info.split(",") | ||
.map(lambda x: x.split(":")) | ||
.map(lambda x: (x[0], hl.or_missing(x.length() > 1, x[1]))) | ||
) | ||
|
||
end_trunc_expr = hl.or_else( | ||
hl.float64(lof_info_expr.get("GERP_DIST", "0")) < gerp_dist_cutoff, False | ||
) & hl.or_else(lof_info_expr.get("50_BP_RULE", "") != "PASS", False) | ||
|
||
return end_trunc_expr | ||
|
||
|
||
def update_loftee_end_trunc_filter( | ||
csq_expr: Union[hl.expr.StructExpression, hl.expr.ArrayExpression], | ||
gerp_dist_cutoff: float = 0.0, | ||
) -> hl.expr.StructExpression: | ||
""" | ||
Update the LOFTEE end truncation filter in the input Struct or Array of Structs. | ||
|
||
The LOFTEE end truncation filter is updated based on the GERP distance cutoff | ||
(`gerp_dist_cutoff`) using `get_loftee_end_trunc_filter_expr`. | ||
|
||
The 'lof_filter' field in the input Struct or Array of Structs is updated to include | ||
'END_TRUNC' if the end truncation filter is met, and 'END_TRUNC' is removed if the | ||
end truncation filter is not met. | ||
|
||
Then the 'lof' field in the input Struct or Array of Structs is updated to 'HC' if | ||
the new 'lof_filter' is missing, and 'LC' if it's not missing. | ||
|
||
:param csq_expr: Struct or Array of Structs containing the LOFTEE annotations. | ||
:param gerp_dist_cutoff: GERP distance cutoff for end truncation. Default is 0.0. | ||
:return: Struct or Array of Structs with updated LOFTEE end truncation filter | ||
annotation. | ||
""" | ||
|
||
def _update_csq_struct(csq_expr: hl.expr.StructExpression): | ||
""" | ||
Update the LOFTEE end truncation filter in the input Struct. | ||
|
||
:param csq_expr: Struct containing the LOFTEE annotations. | ||
:return: Consequence Struct with updated LOFTEE annotations. | ||
""" | ||
end_trunc_expr = get_loftee_end_trunc_filter_expr(csq_expr, gerp_dist_cutoff) | ||
filter_expr = hl.or_else( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The If |
||
hl.set(csq_expr.lof_filter.split(",")), hl.empty_set(hl.tstr) | ||
) | ||
filter_expr = hl.if_else( | ||
end_trunc_expr, | ||
filter_expr.add("END_TRUNC"), | ||
filter_expr.remove("END_TRUNC"), | ||
) | ||
filter_expr = hl.or_missing(filter_expr.length() > 0, hl.delimit(filter_expr)) | ||
|
||
lof_expr = hl.or_missing( | ||
hl.is_defined(csq_expr.lof), | ||
hl.if_else(hl.is_missing(filter_expr), "HC", "LC"), | ||
) | ||
|
||
return hl.struct(lof_filter=filter_expr, lof=lof_expr) | ||
|
||
if isinstance(csq_expr, hl.expr.StructExpression): | ||
return csq_expr.annotate(**_update_csq_struct(csq_expr)) | ||
else: | ||
return csq_expr.map(lambda x: x.annotate(**_update_csq_struct(x))) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for making this notebook! So the gerp_dist_cutoff is changing from -58 to 0 and some "HC" became "LC", is there a justification for this cutoff that we could document?
I summarized our discussion with Konrad last year to answer the questions in the emails and on the forum.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I only checked variants in BRCA2 gene, the last 2 LOF now have "END_TRUNC" filter and became "LC".
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yup, I'm hoping Julia can help me add to the documentation to justify the cutoff, and to also confirm that the patch is being done correctly.