-
Notifications
You must be signed in to change notification settings - Fork 285
/
bh_multiple_tests.sqlx
89 lines (81 loc) · 3.1 KB
/
bh_multiple_tests.sqlx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
config { hasOutput: true }
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-- Adjust p values using the Benjamini-Hochberg multipletests method, additional details in doi:10.1098/rsta.2009.0127
-- the implementation can be compared with the python function 'statsmodels.stats.multitest.multipletests' (method='fdr_bh')
-- @param STRING pvalue_table_name : the name of the table with the p values that need to be adjusted
-- @param STRING pvalue_column_name : the name of the column with p values.
-- @param INT Nrows : Number of tests (equal to number of rows of the input table)
CREATE OR REPLACE PROCEDURE ${self()}(pvalue_table_name STRING, pvalue_column_name STRING, n_rows INT64, temp_table_name STRING )
BEGIN
EXECUTE IMMEDIATE format("""
CREATE TEMP TABLE %s AS
WITH padjusted_data AS (
WITH ranked_data AS (
SELECT *, ( DENSE_RANK() OVER( ORDER BY %s) ) AS jrank
FROM %s
)
SELECT *,
MIN( %d * %s / jrank )
OVER (
ORDER BY jrank DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS p_adj
FROM ranked_data
)
SELECT * EXCEPT (p_adj, jrank), IF( p_adj > 1.0 , 1.0, p_adj) AS p_adj
FROM padjusted_data
ORDER BY jrank""", temp_table_name, pvalue_column_name, pvalue_table_name, n_rows, pvalue_column_name );
EXECUTE IMMEDIATE format("""SELECT * FROM %s""", temp_table_name);
END;
-- a unit test of bh_multiple_tests
BEGIN
CREATE TEMP TABLE Pvalues AS
SELECT 0.001 as pval
UNION ALL SELECT 0.008
UNION ALL SELECT 0.039
UNION ALL SELECT 0.041
UNION ALL SELECT 0.042
UNION ALL SELECT 0.06
UNION ALL SELECT 0.074
UNION ALL SELECT 0.205;
CALL ${self()}('Pvalues','pval',8, 'bh_multiple_tests_results');
# Table Output
# pval p_adj
# 0.001 0.008
# 0.008 0.032
# 0.039 0.06720000000000001
# 0.041 0.06720000000000001
# 0.042 0.06720000000000001
# 0.06 0.08
# 0.074 0.08457142857142856
# 0.205 0.205
ASSERT(
SELECT COUNTIF(
(pval = 0.001 AND p_adj = 0.008)
OR (pval = 0.008 AND p_adj = 0.032)
OR (pval = 0.039 AND p_adj = 0.06720000000000001)
OR (pval = 0.041 AND p_adj = 0.06720000000000001)
OR (pval = 0.042 AND p_adj = 0.06720000000000001)
OR (pval = 0.06 AND p_adj = 0.08)
OR (pval = 0.074 AND p_adj = 0.08457142857142856)
OR (pval = 0.205 AND p_adj = 0.205)
)
FROM bh_multiple_tests_results
) = 8;
DROP TABLE Pvalues;
DROP TABLE bh_multiple_tests_results;
END;