- def sql = "INSTALL parquet; LOAD parquet; COPY (WITH p AS (SELECT * FROM read_parquet('${parquet}/*.parquet')), s AS (SELECT unnest(string_to_array(sequence, '')) AS aa FROM p), h AS (SELECT unnest(map_entries(histogram(aa))) AS kv FROM s), e AS (SELECT * from read_csv_auto('amino_acid_properties.tsv')) SELECT h.kv['key'] AS amino_acid, h.kv['value'] AS count, e.* FROM h JOIN e ON h.kv['key'] = e.one_letter_symbol) TO '${prefix}.histogram.tsv' (HEADER, DELIMITER '\t')"
0 commit comments