-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
Copy pathpostgres-functions.sql
169 lines (145 loc) · 5.14 KB
/
postgres-functions.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
-- Functions TODO:
-- FROM table CROSS JOIN UNNEST(table.column) AS col -> ???? (see icustay-hours)
-- ???(column) -> PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY column) (not sure how to do median in BQ)
-- Set the search_path so all functions are generated on the mimiciv_derived schema
SET search_path TO mimiciv_derived, mimiciv_hosp, mimiciv_icu, mimiciv_ed;
CREATE OR REPLACE FUNCTION REGEXP_EXTRACT(str TEXT, pattern TEXT) RETURNS TEXT AS $$
BEGIN
RETURN substring(str from pattern);
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION REGEXP_CONTAINS(str TEXT, pattern TEXT) RETURNS BOOL AS $$
BEGIN
RETURN str ~ pattern;
END; $$
LANGUAGE PLPGSQL;
-- alias generate_series with generate_array
CREATE OR REPLACE FUNCTION GENERATE_ARRAY(i INTEGER, j INTEGER)
RETURNS setof INTEGER language sql as $$
SELECT GENERATE_SERIES(i, j)
$$;
-- datetime functions
CREATE OR REPLACE FUNCTION DATETIME(dt DATE) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN CAST(dt AS TIMESTAMP(3));
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION DATETIME(year INTEGER, month INTEGER, day INTEGER, hour INTEGER, minute INTEGER, second INTEGER) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN TO_TIMESTAMP(
TO_CHAR(year, '0000') || TO_CHAR(month, '00') || TO_CHAR(day, '00') || TO_CHAR(hour, '00') || TO_CHAR(minute, '00') || TO_CHAR(second, '00'),
'yyyymmddHH24MISS'
);
END; $$
LANGUAGE PLPGSQL;
-- note: in bigquery, `INTERVAL 1 YEAR` is a valid interval
-- but in postgres, it must be `INTERVAL '1' YEAR`
-- DATETIME_ADD(datetime, INTERVAL 'n' DATEPART) -> datetime + INTERVAL 'n' DATEPART
CREATE OR REPLACE FUNCTION DATETIME_ADD(datetime_val TIMESTAMP(3), intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN datetime_val + intvl;
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION DATE_ADD(dt DATE, intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN CAST(dt AS TIMESTAMP(3)) + intvl;
END; $$
LANGUAGE PLPGSQL;
-- DATETIME_SUB(datetime, INTERVAL 'n' DATEPART) -> datetime - INTERVAL 'n' DATEPART
CREATE OR REPLACE FUNCTION DATETIME_SUB(datetime_val TIMESTAMP(3), intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN datetime_val - intvl;
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION DATE_SUB(dt DATE, intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN CAST(dt AS TIMESTAMP(3)) - intvl;
END; $$
LANGUAGE PLPGSQL;
-- TODO:
-- DATETIME_TRUNC(datetime, PART) -> DATE_TRUNC('datepart', datetime)
-- below requires a regex to convert datepart from primitive to a string
-- i.e. encapsulate it in single quotes
CREATE OR REPLACE FUNCTION DATETIME_DIFF(endtime TIMESTAMP(3), starttime TIMESTAMP(3), datepart TEXT) RETURNS NUMERIC AS $$
BEGIN
RETURN
EXTRACT(EPOCH FROM endtime - starttime) /
CASE
WHEN datepart = 'SECOND' THEN 1.0
WHEN datepart = 'MINUTE' THEN 60.0
WHEN datepart = 'HOUR' THEN 3600.0
WHEN datepart = 'DAY' THEN 24*3600.0
WHEN datepart = 'YEAR' THEN 365.242*24*3600.0
ELSE NULL END;
END; $$
LANGUAGE PLPGSQL;
-- BigQuery has a custom data type, PART
-- It's difficult to replicate this in postgresql, which recognizes the PART as a column name,
-- unless it is within an EXTRACT() function.
CREATE OR REPLACE FUNCTION BIGQUERY_FORMAT_TO_PSQL(format_str VARCHAR(255)) RETURNS TEXT AS $$
BEGIN
RETURN
-- use replace to convert BigQuery string format to postgres string format
-- only handles a few cases since we don't extensively use this function
REPLACE(
REPLACE(
REPLACE(
REPLACE(
REPLACE(
REPLACE(
format_str
, '%S', 'SS'
)
, '%M', 'MI'
)
, '%H', 'HH24'
)
, '%d', 'dd'
)
, '%m', 'mm'
)
, '%Y', 'yyyy'
)
;
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION FORMAT_DATE(format_str VARCHAR(255), datetime_val TIMESTAMP(3)) RETURNS TEXT AS $$
BEGIN
RETURN TO_CHAR(
datetime_val,
-- use replace to convert BigQuery string format to postgres string format
-- only handles a few cases since we don't extensively use this function
BIGQUERY_FORMAT_TO_PSQL(format_str)
);
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION PARSE_DATE(format_str VARCHAR(255), string_val VARCHAR(255)) RETURNS DATE AS $$
BEGIN
RETURN TO_DATE(
string_val,
-- use replace to convert BigQuery string format to postgres string format
-- only handles a few cases since we don't extensively use this function
BIGQUERY_FORMAT_TO_PSQL(format_str)
);
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION FORMAT_DATETIME(format_str VARCHAR(255), datetime_val TIMESTAMP(3)) RETURNS TEXT AS $$
BEGIN
RETURN TO_CHAR(
datetime_val,
-- use replace to convert BigQuery string format to postgres string format
-- only handles a few cases since we don't extensively use this function
BIGQUERY_FORMAT_TO_PSQL(format_str)
);
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION PARSE_DATETIME(format_str VARCHAR(255), string_val VARCHAR(255)) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN TO_TIMESTAMP(
string_val,
-- use replace to convert BigQuery string format to postgres string format
-- only handles a few cases since we don't extensively use this function
BIGQUERY_FORMAT_TO_PSQL(format_str)
);
END; $$
LANGUAGE PLPGSQL;