From 148e6735edd1e558e0312287c10e8618d71fd96c Mon Sep 17 00:00:00 2001 From: jkgoodrich <33063077+jkgoodrich@users.noreply.github.com> Date: Thu, 28 Nov 2024 11:44:48 -0500 Subject: [PATCH 1/2] Add functions to modify the LOFTEE END_TRUNC flag --- gnomad/utils/vep.py | 83 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/gnomad/utils/vep.py b/gnomad/utils/vep.py index 5d45924a4..15474dfed 100644 --- a/gnomad/utils/vep.py +++ b/gnomad/utils/vep.py @@ -1003,3 +1003,86 @@ def explode_by_vep_annotation( t = t.explode_rows(t[vep_annotation]) return t + + +def get_loftee_end_trunc_filter_expr( + csq_expr: hl.expr.StructExpression, + gerp_dist_cutoff: float = 0.0, +) -> hl.expr.BooleanExpression: + """ + Get the expression for LOFTEE's END_TRUNC filter based on the GERP distance cutoff. + + The end truncation filter is based on the GERP distance cutoff (`gerp_dist_cutoff`) + and the 'GERP_DIST' and '50_BP_RULE' annotations in the LOFTEE annotations. + + True is returned if the GERP distance is less than the cutoff and the '50_BP_RULE' + annotation is not 'PASS'. + + :param csq_expr: StructExpression containing the LOFTEE annotation 'lof_info', with + 'GERP_DIST' and '50_BP_RULE' info. + :param gerp_dist_cutoff: GERP distance cutoff for end truncation. Default is 0.0. + :return: BooleanExpression for end truncation annotation. + """ + lof_info_expr = hl.dict( + csq_expr.lof_info.split(",") + .map(lambda x: x.split(":")) + .map(lambda x: (x[0], hl.or_missing(x.length() > 1, x[1]))) + ) + + end_trunc_expr = hl.or_else( + hl.float64(lof_info_expr.get("GERP_DIST", "0")) < gerp_dist_cutoff, False + ) & hl.or_else(lof_info_expr.get("50_BP_RULE", "") != "PASS", False) + + return end_trunc_expr + + +def update_loftee_end_trunc_filter( + csq_expr: Union[hl.expr.StructExpression, hl.expr.ArrayExpression], + gerp_dist_cutoff: float = 0.0, +) -> hl.expr.StructExpression: + """ + Update the LOFTEE end truncation filter in the input Struct or Array of Structs. + + The LOFTEE end truncation filter is updated based on the GERP distance cutoff + (`gerp_dist_cutoff`) using `get_loftee_end_trunc_filter_expr`. + + The 'lof_filter' field in the input Struct or Array of Structs is updated to include + 'END_TRUNC' if the end truncation filter is met, and 'END_TRUNC' is removed if the + end truncation filter is not met. + + Then the 'lof' field in the input Struct or Array of Structs is updated to 'HC' if + the new 'lof_filter' is missing, and 'LC' if it's not missing. + + :param csq_expr: Struct or Array of Structs containing the LOFTEE annotations. + :param gerp_dist_cutoff: GERP distance cutoff for end truncation. Default is 0.0. + :return: Struct or Array of Structs with updated LOFTEE end truncation filter + annotation. + """ + def _update_csq_struct(csq_expr: hl.expr.StructExpression): + """ + Update the LOFTEE end truncation filter in the input Struct. + + :param csq_expr: Struct containing the LOFTEE annotations. + :return: Consequence Struct with updated LOFTEE annotations. + """ + end_trunc_expr = get_loftee_end_trunc_filter_expr(csq_expr, gerp_dist_cutoff) + filter_expr = hl.or_else( + hl.set(csq_expr.lof_filter.split(",")), hl.empty_set(hl.tstr) + ) + filter_expr = hl.if_else( + end_trunc_expr, + filter_expr.add("END_TRUNC"), + filter_expr.remove("END_TRUNC"), + ) + filter_expr = hl.or_missing(filter_expr.length() > 0, hl.delimit(filter_expr)) + + lof_expr = hl.or_missing( + hl.is_defined(csq_expr.lof), hl.if_else(hl.is_missing(filter_expr), "HC", "LC") + ) + + return hl.struct(lof_filter=filter_expr, lof=lof_expr) + + if isinstance(csq_expr, hl.expr.StructExpression): + return csq_expr.annotate(**_update_csq_struct(csq_expr)) + else: + return csq_expr.map(lambda x: x.annotate(**_update_csq_struct(x))) From 52fd26b23ad9a8444eb9c97b4775037ef537bece Mon Sep 17 00:00:00 2001 From: jkgoodrich <33063077+jkgoodrich@users.noreply.github.com> Date: Mon, 2 Dec 2024 09:11:05 -0700 Subject: [PATCH 2/2] format --- gnomad/utils/vep.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gnomad/utils/vep.py b/gnomad/utils/vep.py index 15474dfed..7534119c2 100644 --- a/gnomad/utils/vep.py +++ b/gnomad/utils/vep.py @@ -1058,6 +1058,7 @@ def update_loftee_end_trunc_filter( :return: Struct or Array of Structs with updated LOFTEE end truncation filter annotation. """ + def _update_csq_struct(csq_expr: hl.expr.StructExpression): """ Update the LOFTEE end truncation filter in the input Struct. @@ -1077,7 +1078,8 @@ def _update_csq_struct(csq_expr: hl.expr.StructExpression): filter_expr = hl.or_missing(filter_expr.length() > 0, hl.delimit(filter_expr)) lof_expr = hl.or_missing( - hl.is_defined(csq_expr.lof), hl.if_else(hl.is_missing(filter_expr), "HC", "LC") + hl.is_defined(csq_expr.lof), + hl.if_else(hl.is_missing(filter_expr), "HC", "LC"), ) return hl.struct(lof_filter=filter_expr, lof=lof_expr)