diff --git a/docs/guides/examples/aggregate_function_combinators/anyIf.md b/docs/guides/examples/aggregate_function_combinators/anyIf.md new file mode 100644 index 00000000000..f3de0478ce9 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/anyIf.md @@ -0,0 +1,58 @@ +--- +slug: '/examples/aggregate-function-combinators/anyIf' +title: 'anyIf' +description: 'Example of using the anyIf combinator' +keywords: ['any', 'if', 'combinator', 'examples', 'anyIf'] +sidebar_label: 'anyIf' +--- + +# anyIf {#avgif} + +## Description {#description} + +The [`If`](/sql-reference/aggregate-functions/combinators#-if) combinator can be applied to the [`any`](/sql-reference/aggregate-functions/reference/any) +aggregate function to select the first encountered element from a given column +that matches the given condition. + +## Example Usage {#example-usage} + +In this example, we'll create a table that stores sales data with success flags, +and we'll use `anyIf` to select the first `transaction_id`s which are above and +below an amount of 200. + +We first create a table and insert data into it: + +```sql title="Query" +CREATE TABLE sales( + transaction_id UInt32, + amount Decimal(10,2), + is_successful UInt8 +) +ENGINE = MergeTree() +ORDER BY tuple(); + +INSERT INTO sales VALUES + (1, 100.00, 1), + (2, 150.00, 1), + (3, 155.00, 0), + (4, 300.00, 1), + (5, 250.50, 0), + (6, 175.25, 1); +``` + +```sql +SELECT + anyIf(transaction_id, amount < 200) as tid_lt_200, + anyIf(transaction_id, amount > 200) as tid_gt_200 +FROM sales; +``` + +```response title="Response" +┌─tid_lt_200─┬─tid_gt_200─┐ +│ 1 │ 4 │ +└────────────┴────────────┘ +``` + +## See also {#see-also} +- [`any`](/sql-reference/aggregate-functions/reference/any) +- [`If combinator`](/sql-reference/aggregate-functions/combinators#-if) diff --git a/docs/guides/examples/aggregate_function_combinators/argMaxIf.md b/docs/guides/examples/aggregate_function_combinators/argMaxIf.md new file mode 100644 index 00000000000..566f7ee29ab --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/argMaxIf.md @@ -0,0 +1,61 @@ +--- +slug: '/examples/aggregate-function-combinators/argMaxIf' +title: 'argMaxIf' +description: 'Example of using the argMaxIf combinator' +keywords: ['argMax', 'if', 'combinator', 'examples', 'argMaxIf'] +sidebar_label: 'argMaxIf' +--- + +# argMaxIf {#argmaxif} + +## Description {#description} + +The [`If`](/sql-reference/aggregate-functions/combinators#-if) combinator can be applied to the [`argMax`](/sql-reference/aggregate-functions/reference/argmax) +function to find the value of `arg` that corresponds to the maximum value of `val` for rows where the condition is true, +using the `argMaxIf` aggregate combinator function. + +The `argMaxIf` function is useful when you need to find the value associated with +the maximum value in a dataset, but only for rows that satisfy a specific +condition. + +## Example Usage {#example-usage} + +In this example, we'll use a sample dataset of product sales to demonstrate how +`argMaxIf` works. We'll find the product name that has the highest price, but +only for products that have been sold at least 10 times. + +```sql title="Query" +CREATE TABLE product_sales +( + product_name String, + price Decimal32(2), + sales_count UInt32 +) ENGINE = Memory; + +INSERT INTO product_sales VALUES + ('Laptop', 999.99, 10), + ('Phone', 499.99, 15), + ('Tablet', 299.99, 0), + ('Watch', 199.99, 5), + ('Headphones', 79.99, 20); + +SELECT argMaxIf(product_name, price, sales_count >= 10) as most_expensive_popular_product +FROM product_sales; +``` + +The `argMaxIf` function will return the product name that has the highest price +among all products that have been sold at least 10 times (sales_count >= 10). +In this case, it will return 'Laptop' since it has the highest price (999.99) +among the popular products. + +```response title="Response" + ┌─most_expensi⋯lar_product─┐ +1. │ Laptop │ + └──────────────────────────┘ +``` + +## See also {#see-also} +- [`argMax`](/sql-reference/aggregate-functions/reference/argmax) +- [`argMin`](/sql-reference/aggregate-functions/reference/argmin) +- [`argMinIf`](/examples/aggregate-function-combinators/argMinIf) +- [`If combinator`](/sql-reference/aggregate-functions/combinators#-if) diff --git a/docs/guides/examples/aggregate_function_combinators/argMinIf.md b/docs/guides/examples/aggregate_function_combinators/argMinIf.md new file mode 100644 index 00000000000..76772f77c3c --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/argMinIf.md @@ -0,0 +1,65 @@ +--- +slug: '/examples/aggregate-function-combinators/argMinIf' +title: 'argMinIf' +description: 'Example of using the argMinIf combinator' +keywords: ['argMin', 'if', 'combinator', 'examples', 'argMinIf'] +sidebar_label: 'argMinIf' +--- + +# argMinIf {#argminif} + +## Description {#description} + +The [`If`](/sql-reference/aggregate-functions/combinators#-if) combinator can be applied to the [`argMin`](/sql-reference/aggregate-functions/reference/argmin) +function to find the value of `arg` that corresponds to the minimum value of `val` for rows where the condition is true, +using the `argMinIf` aggregate combinator function. + +The `argMinIf` function is useful when you need to find the value associated +with the minimum value in a dataset, but only for rows that satisfy a specific +condition. + +## Example Usage {#example-usage} + +In this example, we'll create a table that stores product prices and their timestamps, +and we'll use `argMinIf` to find the lowest price for each product when it's in stock. + +```sql title="Query" +CREATE TABLE product_prices( + product_id UInt32, + price Decimal(10,2), + timestamp DateTime, + in_stock UInt8 +) ENGINE = Log; + +INSERT INTO product_prices VALUES + (1, 10.99, '2024-01-01 10:00:00', 1), + (1, 9.99, '2024-01-01 10:05:00', 1), + (1, 11.99, '2024-01-01 10:10:00', 0), + (2, 20.99, '2024-01-01 11:00:00', 1), + (2, 19.99, '2024-01-01 11:05:00', 1), + (2, 21.99, '2024-01-01 11:10:00', 1); + +SELECT + product_id, + argMinIf(price, timestamp, in_stock = 1) as lowest_price_when_in_stock +FROM product_prices +GROUP BY product_id; +``` + +The `argMinIf` function will find the price that corresponds to the earliest timestamp for each product, +but only considering rows where `in_stock = 1`. For example: +- Product 1: Among in-stock rows, 10.99 has the earliest timestamp (10:00:00) +- Product 2: Among in-stock rows, 20.99 has the earliest timestamp (11:00:00) + +```response title="Response" + ┌─product_id─┬─lowest_price_when_in_stock─┐ +1. │ 1 │ 10.99 │ +2. │ 2 │ 20.99 │ + └────────────┴────────────────────────────┘ +``` + +## See also {#see-also} +- [`argMin`](/sql-reference/aggregate-functions/reference/argmin) +- [`argMax`](/sql-reference/aggregate-functions/reference/argmax) +- [`argMaxIf`](/examples/aggregate-function-combinators/argMaxIf) +- [`If combinator`](/sql-reference/aggregate-functions/combinators#-if) diff --git a/docs/guides/examples/aggregate_function_combinators/avgIf.md b/docs/guides/examples/aggregate_function_combinators/avgIf.md new file mode 100644 index 00000000000..235f6150067 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/avgIf.md @@ -0,0 +1,53 @@ +--- +slug: '/examples/aggregate-function-combinators/avgIf' +title: 'avgIf' +description: 'Example of using the avgIf combinator' +keywords: ['avg', 'if', 'combinator', 'examples', 'avgIf'] +sidebar_label: 'avgIf' +--- + +# avgIf {#avgif} + +## Description {#description} + +The [`If`](/sql-reference/aggregate-functions/combinators#-if) combinator can be applied to the [`avg`](/sql-reference/aggregate-functions/reference/avg) +function to calculate the arithmetic mean of values for rows where the condition is true, +using the `avgIf` aggregate combinator function. + +## Example Usage {#example-usage} + +In this example, we'll create a table that stores sales data with success flags, +and we'll use `avgIf` to calculate the average sale amount for successful transactions. + +```sql title="Query" +CREATE TABLE sales( + transaction_id UInt32, + amount Decimal(10,2), + is_successful UInt8 +) ENGINE = Log; + +INSERT INTO sales VALUES + (1, 100.50, 1), + (2, 200.75, 1), + (3, 150.25, 0), + (4, 300.00, 1), + (5, 250.50, 0), + (6, 175.25, 1); + +SELECT + avgIf(amount, is_successful = 1) as avg_successful_sale +FROM sales; +``` + +The `avgIf` function will calculate the average amount only for rows where `is_successful = 1`. +In this case, it will average the amounts: 100.50, 200.75, 300.00, and 175.25. + +```response title="Response" + ┌─avg_successful_sale─┐ +1. │ 193.88 │ + └─────────────────────┘ +``` + +## See also {#see-also} +- [`avg`](/sql-reference/aggregate-functions/reference/avg) +- [`If combinator`](/sql-reference/aggregate-functions/combinators#-if) diff --git a/docs/guides/examples/aggregate_function_combinators/avgMap.md b/docs/guides/examples/aggregate_function_combinators/avgMap.md new file mode 100644 index 00000000000..1dcdbd0e51b --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/avgMap.md @@ -0,0 +1,65 @@ +--- +slug: '/examples/aggregate-function-combinators/avgMap' +title: 'avgMap' +description: 'Example of using the avgMap combinator' +keywords: ['avg', 'map', 'combinator', 'examples', 'avgMap'] +sidebar_label: 'avgMap' +--- + +# avgMap {#avgmap} + +## Description {#description} + +The [`Map`](/sql-reference/aggregate-functions/combinators#-map) combinator can be applied to the [`avg`](/sql-reference/aggregate-functions/reference/avg) +function to calculate the arithmetic mean of values in a Map according to each key, using the `avgMap` +aggregate combinator function. + +## Example Usage {#example-usage} + +In this example, we'll create a table that stores status codes and their counts for different timeslots, +where each row contains a Map of status codes to their corresponding counts. We'll use +`avgMap` to calculate the average count for each status code within each timeslot. + +```sql title="Query" +CREATE TABLE metrics( + date Date, + timeslot DateTime, + status Map(String, UInt64) +) ENGINE = Log; + +INSERT INTO metrics VALUES + ('2000-01-01', '2000-01-01 00:00:00', (['a', 'b', 'c'], [15, 25, 35])), + ('2000-01-01', '2000-01-01 00:00:00', (['c', 'd', 'e'], [45, 55, 65])), + ('2000-01-01', '2000-01-01 00:01:00', (['d', 'e', 'f'], [75, 85, 95])), + ('2000-01-01', '2000-01-01 00:01:00', (['f', 'g', 'g'], [105, 115, 125])); + +SELECT + timeslot, + avgMap(status), +FROM metrics +GROUP BY timeslot; +``` + +The `avgMap` function will calculate the average count for each status code within each timeslot. For example: +- In timeslot '2000-01-01 00:00:00': + - Status 'a': 15 + - Status 'b': 25 + - Status 'c': (35 + 45) / 2 = 40 + - Status 'd': 55 + - Status 'e': 65 +- In timeslot '2000-01-01 00:01:00': + - Status 'd': 75 + - Status 'e': 85 + - Status 'f': (95 + 105) / 2 = 100 + - Status 'g': (115 + 125) / 2 = 120 + +```response title="Response" + ┌────────────timeslot─┬─avgMap(status)───────────────────────┐ +1. │ 2000-01-01 00:01:00 │ {'d':75,'e':85,'f':100,'g':120} │ +2. │ 2000-01-01 00:00:00 │ {'a':15,'b':25,'c':40,'d':55,'e':65} │ + └─────────────────────┴──────────────────────────────────────┘ +``` + +## See also {#see-also} +- [`avg`](/sql-reference/aggregate-functions/reference/avg) +- [`Map combinator`](/sql-reference/aggregate-functions/combinators#-map) diff --git a/docs/guides/examples/aggregate_function_combinators/avgMerge.md b/docs/guides/examples/aggregate_function_combinators/avgMerge.md new file mode 100644 index 00000000000..72fb2c9dbf5 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/avgMerge.md @@ -0,0 +1,26 @@ +--- +slug: '/examples/aggregate-function-combinators/avgMerge' +title: 'avgMerge' +description: 'Example of using the avgMerge combinator' +keywords: ['avg', 'merge', 'combinator', 'examples', 'avgMerge'] +sidebar_label: 'avgMerge' +--- + +# avgMerge {#avgMerge} + +## Description {#description} + +The [`Merge`](/sql-reference/aggregate-functions/combinators#-state) combinator +can be applied to the [`avg`](/sql-reference/aggregate-functions/reference/avg) +function to produce a final result by combining partial aggregate states. + +## Example Usage {#example-usage} + +The `Merge` combinator is closely related to the `State` combinator. Refer to +["avgState example usage"](/examples/aggregate-function-combinators/avgState/#example-usage) +for an example of both `avgMerge` and `avgState`. + +## See also {#see-also} +- [`avg`](/sql-reference/aggregate-functions/reference/avg) +- [`Merge`](/sql-reference/aggregate-functions/combinators#-merge) +- [`MergeState`](/sql-reference/aggregate-functions/combinators#-mergestate) diff --git a/docs/guides/examples/aggregate_function_combinators/avgMergeState.md b/docs/guides/examples/aggregate_function_combinators/avgMergeState.md new file mode 100644 index 00000000000..d4c132b16a3 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/avgMergeState.md @@ -0,0 +1,215 @@ +--- +slug: '/examples/aggregate-function-combinators/avgMergeState' +title: 'avgMergeState' +description: 'Example of using the avgMergeState combinator' +keywords: ['avg', 'MergeState', 'combinator', 'examples', 'avgMergeState'] +sidebar_label: 'avgMergeState' +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# avgMergeState {#avgMergeState} + +## Description {#description} + +The [`MergeState`](/sql-reference/aggregate-functions/combinators#-state) combinator +can be applied to the [`avg`](/sql-reference/aggregate-functions/reference/avg) +function to merge partial aggregate states of type `AverageFunction(avg, T)` and +return a new intermediate aggregation state. + +## Example Usage {#example-usage} + +The `MergeState` combinator is particularly useful for multi-level aggregation +scenarios where you want to combine pre-aggregated states and maintain them as +states (rather than finalizing them) for further processing. To illustrate, we'll +look at an example in which we transform individual server performance metrics +into hierarchical aggregations across multiple levels: Server level → Region level +→ Datacenter level. + +First we create a table to store the raw data: + +```sql +CREATE TABLE raw_server_metrics +( + timestamp DateTime DEFAULT now(), + server_id UInt32, + region String, + datacenter String, + response_time_ms UInt32 +) +ENGINE = MergeTree() +ORDER BY (region, server_id, timestamp); +``` + +We'll create a server-level aggregation target table and define an Incremental +Materialized View acting as an insert trigger to it: + +```sql +CREATE TABLE server_performance +( + server_id UInt32, + region String, + datacenter String, + avg_response_time AggregateFunction(avg, UInt32) +) +ENGINE = AggregatingMergeTree() +ORDER BY (region, server_id); + +CREATE MATERIALIZED VIEW server_performance_mv +TO server_performance +AS SELECT + server_id, + region, + datacenter, + avgState(response_time_ms) AS avg_response_time +FROM raw_server_metrics +GROUP BY server_id, region, datacenter; +``` + +We'll do the same for the regional and datacenter levels: + +```sql +CREATE TABLE region_performance +( + region String, + datacenter String, + avg_response_time AggregateFunction(avg, UInt32) +) +ENGINE = AggregatingMergeTree() +ORDER BY (datacenter, region); + +CREATE MATERIALIZED VIEW region_performance_mv +TO region_performance +AS SELECT + region, + datacenter, + avgMergeState(avg_response_time) AS avg_response_time +FROM server_performance +GROUP BY region, datacenter; + +-- datacenter level table and Materialized View + +CREATE TABLE datacenter_performance +( + datacenter String, + avg_response_time AggregateFunction(avg, UInt32) +) +ENGINE = AggregatingMergeTree() +ORDER BY datacenter; + +CREATE MATERIALIZED VIEW datacenter_performance_mv +TO datacenter_performance +AS SELECT + datacenter, + avgMergeState(avg_response_time) AS avg_response_time +FROM region_performance +GROUP BY datacenter; +``` + +We'll then insert sample raw data into the source table: + +```sql +INSERT INTO raw_server_metrics (timestamp, server_id, region, datacenter, response_time_ms) VALUES + (now(), 101, 'us-east', 'dc1', 120), + (now(), 101, 'us-east', 'dc1', 130), + (now(), 102, 'us-east', 'dc1', 115), + (now(), 201, 'us-west', 'dc1', 95), + (now(), 202, 'us-west', 'dc1', 105), + (now(), 301, 'eu-central', 'dc2', 145), + (now(), 302, 'eu-central', 'dc2', 155); +``` + +We'll write three queries for each of the levels: + + + +```sql +SELECT + server_id, + region, + avgMerge(avg_response_time) AS avg_response_ms +FROM server_performance +GROUP BY server_id, region +ORDER BY region, server_id; +``` +```response +┌─server_id─┬─region─────┬─avg_response_ms─┐ +│ 301 │ eu-central │ 145 │ +│ 302 │ eu-central │ 155 │ +│ 101 │ us-east │ 125 │ +│ 102 │ us-east │ 115 │ +│ 201 │ us-west │ 95 │ +│ 202 │ us-west │ 105 │ +└───────────┴────────────┴─────────────────┘ +``` + + +```sql +SELECT + region, + datacenter, + avgMerge(avg_response_time) AS avg_response_ms +FROM region_performance +GROUP BY region, datacenter +ORDER BY datacenter, region; +``` +```response +┌─region─────┬─datacenter─┬────avg_response_ms─┐ +│ us-east │ dc1 │ 121.66666666666667 │ +│ us-west │ dc1 │ 100 │ +│ eu-central │ dc2 │ 150 │ +└────────────┴────────────┴────────────────────┘ +``` + + +```sql +SELECT + datacenter, + avgMerge(avg_response_time) AS avg_response_ms +FROM datacenter_performance +GROUP BY datacenter +ORDER BY datacenter; +``` +```response +┌─datacenter─┬─avg_response_ms─┐ +│ dc1 │ 113 │ +│ dc2 │ 150 │ +└────────────┴─────────────────┘ +``` + + + +We can insert more data: + +```sql +INSERT INTO raw_server_metrics (timestamp, server_id, region, datacenter, response_time_ms) VALUES + (now(), 101, 'us-east', 'dc1', 140), + (now(), 201, 'us-west', 'dc1', 85), + (now(), 301, 'eu-central', 'dc2', 135); +``` + +Let's check the datacenter-level performance again. Notice how the entire +aggregation chain updated automatically: + +```sql +SELECT + datacenter, + avgMerge(avg_response_time) AS avg_response_ms +FROM datacenter_performance +GROUP BY datacenter +ORDER BY datacenter; +``` + +```response +┌─datacenter─┬────avg_response_ms─┐ +│ dc1 │ 112.85714285714286 │ +│ dc2 │ 145 │ +└────────────┴────────────────────┘ +``` + +## See also {#see-also} +- [`avg`](/sql-reference/aggregate-functions/reference/avg) +- [`AggregateFunction`](/sql-reference/data-types/aggregatefunction) +- [`Merge`](/sql-reference/aggregate-functions/combinators#-merge) +- [`MergeState`](/sql-reference/aggregate-functions/combinators#-mergestate) diff --git a/docs/guides/examples/aggregate_function_combinators/avgResample.md b/docs/guides/examples/aggregate_function_combinators/avgResample.md new file mode 100644 index 00000000000..029efdecdc4 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/avgResample.md @@ -0,0 +1,68 @@ +--- +slug: '/examples/aggregate-function-combinators/avgResample' +title: 'avgResample' +description: 'Example of using the Resample combinator with avg' +keywords: ['avg', 'Resample', 'combinator', 'examples', 'avgResample'] +sidebar_label: 'avgResample' +--- + +# countResample {#countResample} + +## Description {#description} + +The [`Resample`](/sql-reference/aggregate-functions/combinators#-resample) +combinator can be applied to the [`count`](/sql-reference/aggregate-functions/reference/count) +aggregate function to count values of a specified key column in a fixed number +of intervals (`N`). + +## Example Usage {#example-usage} + +### Basic example {#basic-example} + +Let's look at an example. We'll create a table which contains the `name`, `age` and +`wage` of employees, and we'll insert some data into it: + +```sql +CREATE TABLE employee_data +( + name String, + age UInt8, + wage Float32 +) +ENGINE = MergeTree() +ORDER BY tuple() + +INSERT INTO employee_data (name, age, wage) VALUES + ('John', 16, 10.0), + ('Alice', 30, 15.0), + ('Mary', 35, 8.0), + ('Evelyn', 48, 11.5), + ('David', 62, 9.9), + ('Brian', 60, 16.0); +``` + +Let's get the average wage of the people whose age lies in the intervals of `[30,60)` +and `[60,75)` (`[` is exclusive and `)` is inclusive). Since we use integer +representation for age, we get ages in the intervals `[30, 59]` and `[60,74]`. +To do so we apply the `Resample` combinator to the `avg` aggregate function. + +```sql +WITH avg_wage AS +( + SELECT avgResample(30, 75, 30)(wage, age) AS original_avg_wage + FROM employee_data +) +SELECT + arrayMap(x -> round(x, 3), original_avg_wage) AS avg_wage_rounded +FROM avg_wage; +``` + +```response +┌─avg_wage_rounded─┐ +│ [11.5,12.95] │ +└──────────────────┘ +``` + +## See also {#see-also} +- [`count`](/sql-reference/aggregate-functions/reference/count) +- [`Resample combinator`](/sql-reference/aggregate-functions/combinators#-resample) diff --git a/docs/guides/examples/aggregate_function_combinators/avgState.md b/docs/guides/examples/aggregate_function_combinators/avgState.md new file mode 100644 index 00000000000..fa6536ffb8e --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/avgState.md @@ -0,0 +1,146 @@ +--- +slug: '/examples/aggregate-function-combinators/avgState' +title: 'avgState' +description: 'Example of using the avgState combinator' +keywords: ['avg', 'state', 'combinator', 'examples', 'avgState'] +sidebar_label: 'avgState' +--- + +# avgState {#avgState} + +## Description {#description} + +The [`State`](/sql-reference/aggregate-functions/combinators#-state) combinator +can be applied to the [`avg`](/sql-reference/aggregate-functions/reference/avg) +function to produce an intermediate state of `AggregateFunction(avg, T)` type where +`T` is the specified type for the average. + +## Example Usage {#example-usage} + +In this example, we'll look at how we can use the `AggregateFunction` type, +together with the `avgState` function to aggregate website traffic data. + +First create the source table for website traffic data: + +```sql +CREATE TABLE raw_page_views +( + page_id UInt32, + page_name String, + response_time_ms UInt32, -- Page response time in milliseconds + viewed_at DateTime DEFAULT now() +) +ENGINE = MergeTree() +ORDER BY (page_id, viewed_at); +``` + +Create the aggregate table that will store average response times. Note that +`avg` cannot use the `SimpleAggregateFunction` type as it requires a complex +state (a sum and a count). We therefore use the `AggregateFunction` type: + +```sql +CREATE TABLE page_performance +( + page_id UInt32, + page_name String, + avg_response_time AggregateFunction(avg, UInt32) -- Stores the state needed for avg calculation +) +ENGINE = AggregatingMergeTree() +ORDER BY page_id; +``` + +Create an Incremental Materialized View that will act as an insert trigger to +new data and store the intermediate state data in the target table defined above: + +```sql +CREATE MATERIALIZED VIEW page_performance_mv +TO page_performance +AS SELECT + page_id, + page_name, + avgState(response_time_ms) AS avg_response_time -- Using -State combinator +FROM raw_page_views +GROUP BY page_id, page_name; +``` + +Insert some initial data into the source table, creating a part on disk: + +```sql +INSERT INTO raw_page_views (page_id, page_name, response_time_ms) VALUES + (1, 'Homepage', 120), + (1, 'Homepage', 135), + (2, 'Products', 95), + (2, 'Products', 105), + (3, 'About', 80), + (3, 'About', 90); +``` + +Insert some more data to create a second part on disk: + +```sql +INSERT INTO raw_page_views (page_id, page_name, response_time_ms) VALUES +(1, 'Homepage', 150), +(2, 'Products', 110), +(3, 'About', 70), +(4, 'Contact', 60), +(4, 'Contact', 65); +``` + +Examine the target table `page_performance`: + +```sql +SELECT + page_id, + page_name, + avg_response_time, + toTypeName(avg_response_time) +FROM page_performance +``` + +```response +┌─page_id─┬─page_name─┬─avg_response_time─┬─toTypeName(avg_response_time)──┐ +│ 1 │ Homepage │ � │ AggregateFunction(avg, UInt32) │ +│ 2 │ Products │ � │ AggregateFunction(avg, UInt32) │ +│ 3 │ About │ � │ AggregateFunction(avg, UInt32) │ +│ 1 │ Homepage │ � │ AggregateFunction(avg, UInt32) │ +│ 2 │ Products │ n │ AggregateFunction(avg, UInt32) │ +│ 3 │ About │ F │ AggregateFunction(avg, UInt32) │ +│ 4 │ Contact │ } │ AggregateFunction(avg, UInt32) │ +└─────────┴───────────┴───────────────────┴────────────────────────────────┘ +``` + +Notice that the `avg_response_time` column is of type `AggregateFunction(avg, UInt32)` +and stores intermediate state information. Also notice that the row data for the +`avg_response_time` is not useful to us and we see strange text characters such +as `�, n, F, }`. This is the terminals attempt to display binary data as text. +The reason for this is that `AggregateFunction` types store their state in a +binary format that's optimized for efficient storage and computation, not for +human readability. This binary state contains all the information needed to +calculate the average. + +To make use of it, use the `Merge` combinator: + +```sql +SELECT + page_id, + page_name, + avgMerge(avg_response_time) AS average_response_time_ms +FROM page_performance +GROUP BY page_id, page_name +ORDER BY page_id; +``` + +Now we see the correct averages: + +```response +┌─page_id─┬─page_name─┬─average_response_time_ms─┐ +│ 1 │ Homepage │ 135 │ +│ 2 │ Products │ 103.33333333333333 │ +│ 3 │ About │ 80 │ +│ 4 │ Contact │ 62.5 │ +└─────────┴───────────┴──────────────────────────┘ +``` + +## See also {#see-also} +- [`avg`](/sql-reference/aggregate-functions/reference/avg) +- [`State`](/sql-reference/aggregate-functions/combinators#-state) diff --git a/docs/guides/examples/aggregate_function_combinators/countIf.md b/docs/guides/examples/aggregate_function_combinators/countIf.md new file mode 100644 index 00000000000..1bd7c343e85 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/countIf.md @@ -0,0 +1,55 @@ +--- +slug: '/examples/aggregate-function-combinators/countIf' +title: 'countIf' +description: 'Example of using the countIf combinator' +keywords: ['count', 'if', 'combinator', 'examples', 'countIf'] +sidebar_label: 'countIf' +--- + +# countIf {#countif} + +## Description {#description} + +The [`If`](/sql-reference/aggregate-functions/combinators#-if) combinator can be applied to the [`count`](/sql-reference/aggregate-functions/reference/count) +function to count the number of rows where the condition is true, +using the `countIf` aggregate combinator function. + +## Example Usage {#example-usage} + +In this example, we'll create a table that stores user login attempts, +and we'll use `countIf` to count the number of successful logins. + +```sql title="Query" +CREATE TABLE login_attempts( + user_id UInt32, + timestamp DateTime, + is_successful UInt8 +) ENGINE = Log; + +INSERT INTO login_attempts VALUES + (1, '2024-01-01 10:00:00', 1), + (1, '2024-01-01 10:05:00', 0), + (1, '2024-01-01 10:10:00', 1), + (2, '2024-01-01 11:00:00', 1), + (2, '2024-01-01 11:05:00', 1), + (2, '2024-01-01 11:10:00', 0); + +SELECT + user_id, + countIf(is_successful = 1) as successful_logins +FROM login_attempts +GROUP BY user_id; +``` + +The `countIf` function will count only the rows where `is_successful = 1` for each user. + +```response title="Response" + ┌─user_id─┬─successful_logins─┐ +1. │ 1 │ 2 │ +2. │ 2 │ 2 │ + └─────────┴───────────────────┘ +``` + +## See also {#see-also} +- [`count`](/sql-reference/aggregate-functions/reference/count) +- [`If combinator`](/sql-reference/aggregate-functions/combinators#-if) diff --git a/docs/guides/examples/aggregate_function_combinators/countResample.md b/docs/guides/examples/aggregate_function_combinators/countResample.md new file mode 100644 index 00000000000..c20f0aca74d --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/countResample.md @@ -0,0 +1,61 @@ +--- +slug: '/examples/aggregate-function-combinators/countResample' +title: 'countResample' +description: 'Example of using the Resample combinator with count' +keywords: ['count', 'Resample', 'combinator', 'examples', 'countResample'] +sidebar_label: 'countResample' +--- + +# countResample {#countResample} + +## Description {#description} + +The [`Resample`](/sql-reference/aggregate-functions/combinators#-resample) +combinator can be applied to the [`count`](/sql-reference/aggregate-functions/reference/count) +aggregate function to count values of a specified key column in a fixed number +of intervals (`N`). + +## Example Usage {#example-usage} + +### Basic example {#basic-example} + +Let's look at an example. We'll create a table which contains the `name`, `age` and +`wage` of employees, and we'll insert some data into it: + +```sql +CREATE TABLE employee_data +( + name String, + age UInt8, + wage Float32 +) +ENGINE = MergeTree() +ORDER BY tuple() + +INSERT INTO employee_data (name, age, wage) VALUES + ('John', 16, 10.0), + ('Alice', 30, 15.0), + ('Mary', 35, 8.0), + ('Evelyn', 48, 11.5), + ('David', 62, 9.9), + ('Brian', 60, 16.0); +``` + +Let's count all the people whose age lies in the intervals of `[30,60)` +and `[60,75)`. Since we use integer representation for age, we get ages in the +`[30, 59]` and `[60,74]` intervals. To do so we apply the `Resample` combinator +to `count` + +```sql +SELECT countResample(30, 75, 30)(name, age) AS amount FROM employee_data +``` + +```response +┌─amount─┐ +│ [3,2] │ +└────────┘ +``` + +## See also {#see-also} +- [`count`](/sql-reference/aggregate-functions/reference/count) +- [`Resample combinator`](/sql-reference/aggregate-functions/combinators#-resample) diff --git a/docs/guides/examples/aggregate_function_combinators/groupArrayDistinct.md b/docs/guides/examples/aggregate_function_combinators/groupArrayDistinct.md new file mode 100644 index 00000000000..fd98073b543 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/groupArrayDistinct.md @@ -0,0 +1,39 @@ +--- +slug: '/examples/aggregate-function-combinators/groupArrayDistinct' +title: 'groupArrayDistinct' +description: 'Example of using the groupArrayDistinct combinator' +keywords: ['groupArray', 'Distinct', 'combinator', 'examples', 'groupArrayDistinct'] +sidebar_label: 'groupArrayDistinct' +--- + +# groupArrayDistinct {#sumdistinct} + +## Description {#description} + +The [`groupArrayDistinct`](/sql-reference/aggregate-functions/combinators#-foreach) combinator +can be applied to the [`groupArray`](/sql-reference/aggregate-functions/reference/sum) aggregate function to create an array +of distinct argument values. + +## Example Usage {#example-usage} + +For this example we'll make use of the `hits` dataset available in our [SQL playground](https://sql.clickhouse.com/). + +Imagine you want to find out, for each distinct landing page domain (`URLDomain`) +on your website, what are all the unique User Agent OS codes (`OS`) recorded for +visitors landing on that domain. This could help you understand the variety of +operating systems interacting with different parts of your site. + +```sql runnable +SELECT + URLDomain, + groupArrayDistinct(OS) AS distinct_os_codes +FROM metrica.hits_v1 +WHERE URLDomain != '' -- Consider only hits with a recorded domain +GROUP BY URLDomain +ORDER BY URLDomain ASC +LIMIT 20; +``` + +## See also {#see-also} +- [`groupArray`](/sql-reference/aggregate-functions/reference/grouparray) +- [`Distinct combinator`](/sql-reference/aggregate-functions/combinators#-distinct) diff --git a/docs/guides/examples/aggregate_function_combinators/groupArrayResample.md b/docs/guides/examples/aggregate_function_combinators/groupArrayResample.md new file mode 100644 index 00000000000..9abea133bb0 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/groupArrayResample.md @@ -0,0 +1,65 @@ +--- +slug: '/examples/aggregate-function-combinators/groupArrayResample' +title: 'groupArrayResample' +description: 'Example of using the Resample combinator with groupArray' +keywords: ['groupArray', 'Resample', 'combinator', 'examples', 'groupArrayResample'] +sidebar_label: 'groupArrayResample' +--- + +# groupArrayResample {#grouparrayresample} + +## Description {#description} + +The [`Resample`](/sql-reference/aggregate-functions/combinators#-resample) +combinator can be applied to the [`groupArray`](/sql-reference/aggregate-functions/reference/sum) aggregate function to +divide the range of a specified key column into a fixed number of intervals (`N`) +and construct the resulting array by selecting one representative value +(corresponding to the minimum key) from the data points falling into each interval. +It creates a downsampled view of the data rather than collecting all values. + +## Example Usage {#example-usage} + +Let's look at an example. We'll create a table which contains the `name`, `age` and +`wage` of employees, and we'll insert some data into it: + +```sql +CREATE TABLE employee_data +( + name String, + age UInt8, + wage Float32 +) ENGINE = MergeTree() +ORDER BY tuple() + +INSERT INTO employee_data (name, age, wage) VALUES + ('John', 16, 10.0), + ('Alice', 30, 15.0), + ('Mary', 35, 8.0), + ('Evelyn', 48, 11.5), + ('David', 62, 9.9), + ('Brian', 60, 16.0); +``` + +Let's get the names of the people whose age lies in the intervals of `[30,60)` +and `[60,75)`. Since we use integer representation for age, we get ages in the +`[30, 59]` and `[60,74]` intervals. + +To aggregate names in an array, we use the `groupArray` aggregate function. +It takes one argument. In our case, it's the name column. The `groupArrayResample` +function should use the age column to aggregate names by age. To define the +required intervals, we pass `30`, `75`, `30` as arguments into the `groupArrayResample` +function: + +```sql +SELECT groupArrayResample(30, 75, 30)(name, age) FROM employee_data +``` + +```response +┌─groupArrayResample(30, 75, 30)(name, age)─────┐ +│ [['Alice','Mary','Evelyn'],['David','Brian']] │ +└───────────────────────────────────────────────┘ +``` + +## See also {#see-also} +- [`groupArray`](/sql-reference/aggregate-functions/reference/grouparray) +- [`Resample combinator`](/sql-reference/aggregate-functions/combinators#-resample) diff --git a/docs/guides/examples/aggregate_function_combinators/maxMap.md b/docs/guides/examples/aggregate_function_combinators/maxMap.md new file mode 100644 index 00000000000..dd2e524ac04 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/maxMap.md @@ -0,0 +1,65 @@ +--- +slug: '/examples/aggregate-function-combinators/maxMap' +title: 'maxMap' +description: 'Example of using the maxMap combinator' +keywords: ['max', 'map', 'combinator', 'examples', 'maxMap'] +sidebar_label: 'maxMap' +--- + +# maxMap {#maxmap} + +## Description {#description} + +The [`Map`](/sql-reference/aggregate-functions/combinators#-map) combinator can be applied to the [`max`](/sql-reference/aggregate-functions/reference/max) +function to calculate the maximum value in a Map according to each key, using the `maxMap` +aggregate combinator function. + +## Example Usage {#example-usage} + +In this example, we'll create a table that stores status codes and their counts for different timeslots, +where each row contains a Map of status codes to their corresponding counts. We'll use +`maxMap` to find the maximum count for each status code within each timeslot. + +```sql title="Query" +CREATE TABLE metrics( + date Date, + timeslot DateTime, + status Map(String, UInt64) +) ENGINE = Log; + +INSERT INTO metrics VALUES + ('2000-01-01', '2000-01-01 00:00:00', (['a', 'b', 'c'], [15, 25, 35])), + ('2000-01-01', '2000-01-01 00:00:00', (['c', 'd', 'e'], [45, 55, 65])), + ('2000-01-01', '2000-01-01 00:01:00', (['d', 'e', 'f'], [75, 85, 95])), + ('2000-01-01', '2000-01-01 00:01:00', (['f', 'g', 'g'], [105, 115, 125])); + +SELECT + timeslot, + maxMap(status), +FROM metrics +GROUP BY timeslot; +``` + +The `maxMap` function will find the maximum count for each status code within each timeslot. For example: +- In timeslot '2000-01-01 00:00:00': + - Status 'a': 15 + - Status 'b': 25 + - Status 'c': max(35, 45) = 45 + - Status 'd': 55 + - Status 'e': 65 +- In timeslot '2000-01-01 00:01:00': + - Status 'd': 75 + - Status 'e': 85 + - Status 'f': max(95, 105) = 105 + - Status 'g': max(115, 125) = 125 + +```response title="Response" + ┌────────────timeslot─┬─maxMap(status)───────────────────────┐ +1. │ 2000-01-01 00:01:00 │ {'d':75,'e':85,'f':105,'g':125} │ +2. │ 2000-01-01 00:00:00 │ {'a':15,'b':25,'c':45,'d':55,'e':65} │ + └─────────────────────┴──────────────────────────────────────┘ +``` + +## See also {#see-also} +- [`max`](/sql-reference/aggregate-functions/reference/max) +- [`Map combinator`](/sql-reference/aggregate-functions/combinators#-map) diff --git a/docs/guides/examples/aggregate_function_combinators/maxSimpleState.md b/docs/guides/examples/aggregate_function_combinators/maxSimpleState.md new file mode 100644 index 00000000000..2b758124b06 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/maxSimpleState.md @@ -0,0 +1,25 @@ +--- +slug: '/examples/aggregate-function-combinators/maxSimpleState' +title: 'maxSimpleState' +description: 'Example of using the minSimpleState combinator' +keywords: ['min', 'state', 'simple', 'combinator', 'examples', 'minSimpleState'] +sidebar_label: 'minSimpleState' +--- + +# minSimpleState {#minsimplestate} + +## Description {#description} + +The [`SimpleState`](/sql-reference/aggregate-functions/combinators#-simplestate) combinator can be applied to the [`max`](/sql-reference/aggregate-functions/reference/max) +function to return the maximum value across all input values. It returns the +result with type `SimpleAggregateState`. + +## Example Usage {#example-usage} + +The example given in [`minSimpleState`](/examples/aggregate-function-combinators/minSimpleState/#example-usage) +demonstrates a usage of both `maxSimpleState` and `minSimpleState`. + +## See also {#see-also} +- [`max`](/sql-reference/aggregate-functions/reference/max) +- [`SimpleState combinator`](/sql-reference/aggregate-functions/combinators#-simplestate) +- [`SimpleAggregateFunction type`](/sql-reference/data-types/simpleaggregatefunction) diff --git a/docs/guides/examples/aggregate_function_combinators/minMap.md b/docs/guides/examples/aggregate_function_combinators/minMap.md new file mode 100644 index 00000000000..e72d0d86954 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/minMap.md @@ -0,0 +1,65 @@ +--- +slug: '/examples/aggregate-function-combinators/minMap' +title: 'minMap' +description: 'Example of using the minMap combinator' +keywords: ['min', 'map', 'combinator', 'examples', 'minMap'] +sidebar_label: 'minMap' +--- + +# minMap {#minmap} + +## Description {#description} + +The [`Map`](/sql-reference/aggregate-functions/combinators#-map) combinator can be applied to the [`min`](/sql-reference/aggregate-functions/reference/min) +function to calculate the minimum value in a Map according to each key, using the `minMap` +aggregate combinator function. + +## Example Usage {#example-usage} + +In this example, we'll create a table that stores status codes and their counts for different timeslots, +where each row contains a Map of status codes to their corresponding counts. We'll use +`minMap` to find the minimum count for each status code within each timeslot. + +```sql title="Query" +CREATE TABLE metrics( + date Date, + timeslot DateTime, + status Map(String, UInt64) +) ENGINE = Log; + +INSERT INTO metrics VALUES + ('2000-01-01', '2000-01-01 00:00:00', (['a', 'b', 'c'], [15, 25, 35])), + ('2000-01-01', '2000-01-01 00:00:00', (['c', 'd', 'e'], [45, 55, 65])), + ('2000-01-01', '2000-01-01 00:01:00', (['d', 'e', 'f'], [75, 85, 95])), + ('2000-01-01', '2000-01-01 00:01:00', (['f', 'g', 'g'], [105, 115, 125])); + +SELECT + timeslot, + minMap(status), +FROM metrics +GROUP BY timeslot; +``` + +The `minMap` function will find the minimum count for each status code within each timeslot. For example: +- In timeslot '2000-01-01 00:00:00': + - Status 'a': 15 + - Status 'b': 25 + - Status 'c': min(35, 45) = 35 + - Status 'd': 55 + - Status 'e': 65 +- In timeslot '2000-01-01 00:01:00': + - Status 'd': 75 + - Status 'e': 85 + - Status 'f': min(95, 105) = 95 + - Status 'g': min(115, 125) = 115 + +```response title="Response" + ┌────────────timeslot─┬─minMap(status)───────────────────────┐ +1. │ 2000-01-01 00:01:00 │ {'d':75,'e':85,'f':95,'g':115} │ +2. │ 2000-01-01 00:00:00 │ {'a':15,'b':25,'c':35,'d':55,'e':65} │ + └─────────────────────┴──────────────────────────────────────┘ +``` + +## See also {#see-also} +- [`min`](/sql-reference/aggregate-functions/reference/min) +- [`Map combinator`](/sql-reference/aggregate-functions/combinators#-map) diff --git a/docs/guides/examples/aggregate_function_combinators/minSimpleState.md b/docs/guides/examples/aggregate_function_combinators/minSimpleState.md new file mode 100644 index 00000000000..fdad0a2e374 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/minSimpleState.md @@ -0,0 +1,169 @@ +--- +slug: '/examples/aggregate-function-combinators/minSimpleState' +title: 'minSimpleState' +description: 'Example of using the minSimpleState combinator' +keywords: ['min', 'state', 'simple', 'combinator', 'examples', 'minSimpleState'] +sidebar_label: 'minSimpleState' +--- + +# minSimpleState {#minsimplestate} + +## Description {#description} + +The [`SimpleState`](/sql-reference/aggregate-functions/combinators#-simplestate) combinator can be applied to the [`min`](/sql-reference/aggregate-functions/reference/min) +function to return the minimum value across all input values. It returns the +result with type [`SimpleAggregateFunction`](/docs/sql-reference/data-types/simpleaggregatefunction). + +## Example Usage {#example-usage} + +Let's look at a practical example using a table that tracks daily temperature +readings. For each location, we want to maintain the lowest temperature recorded. +Using the `SimpleAggregateFunction` type with `min` automatically updates the +stored value when a lower temperature is encountered. + +Create the source table for raw temperature readings: + +```sql +CREATE TABLE raw_temperature_readings +( + location_id UInt32, + location_name String, + temperature Int32, + recorded_at DateTime DEFAULT now() +) + ENGINE = MergeTree() +ORDER BY (location_id, recorded_at); +``` + +Create the aggregate table that will store the min temperatures: + +```sql +CREATE TABLE temperature_extremes +( + location_id UInt32, + location_name String, + min_temp SimpleAggregateFunction(min, Int32), -- Stores minimum temperature + max_temp SimpleAggregateFunction(max, Int32) -- Stores maximum temperature +) +ENGINE = AggregatingMergeTree() +ORDER BY location_id; +``` + +Create an Incremental Materialized View that will act as an insert trigger +for inserted data and maintains the minimum, maximum temperatures per location. + +```sql +CREATE MATERIALIZED VIEW temperature_extremes_mv +TO temperature_extremes +AS SELECT + location_id, + location_name, + minSimpleState(temperature) AS min_temp, -- Using SimpleState combinator + maxSimpleState(temperature) AS max_temp -- Using SimpleState combinator +FROM raw_temperature_readings +GROUP BY location_id, location_name; +``` + +Insert some initial temperature readings: + +```sql +INSERT INTO raw_temperature_readings (location_id, location_name, temperature) VALUES +(1, 'North', 5), +(2, 'South', 15), +(3, 'West', 10), +(4, 'East', 8); +``` + +These readings are automatically processed by the Materialized View. Let's check +the current state: + +```sql +SELECT + location_id, + location_name, + min_temp, -- Directly accessing the SimpleAggregateFunction values + max_temp -- No need for finalization function with SimpleAggregateFunction +FROM temperature_extremes +ORDER BY location_id; +``` + +```response +┌─location_id─┬─location_name─┬─min_temp─┬─max_temp─┐ +│ 1 │ North │ 5 │ 5 │ +│ 2 │ South │ 15 │ 15 │ +│ 3 │ West │ 10 │ 10 │ +│ 4 │ East │ 8 │ 8 │ +└─────────────┴───────────────┴──────────┴──────────┘ +``` + +Insert some more data: + +```sql +INSERT INTO raw_temperature_readings (location_id, location_name, temperature) VALUES + (1, 'North', 3), + (2, 'South', 18), + (3, 'West', 10), + (1, 'North', 8), + (4, 'East', 2); +``` + +View the updated extremes after new data: + +```sql +SELECT + location_id, + location_name, + min_temp, + max_temp +FROM temperature_extremes +ORDER BY location_id; +``` + +```response +┌─location_id─┬─location_name─┬─min_temp─┬─max_temp─┐ +│ 1 │ North │ 3 │ 8 │ +│ 1 │ North │ 5 │ 5 │ +│ 2 │ South │ 18 │ 18 │ +│ 2 │ South │ 15 │ 15 │ +│ 3 │ West │ 10 │ 10 │ +│ 3 │ West │ 10 │ 10 │ +│ 4 │ East │ 2 │ 2 │ +│ 4 │ East │ 8 │ 8 │ +└─────────────┴───────────────┴──────────┴──────────┘ +``` + +Notice above that we have two inserted values for each location. This is because +parts have not yet been merged (and aggregated by `AggregatingMergeTree`). To get +the final result from the partial states we need to add a `GROUP BY`: + +```sql +SELECT + location_id, + location_name, + min(min_temp) AS min_temp, -- Aggregate across all parts + max(max_temp) AS max_temp -- Aggregate across all parts +FROM temperature_extremes +GROUP BY location_id, location_name +ORDER BY location_id; +``` + +We now get the expected result: + +```sql +┌─location_id─┬─location_name─┬─min_temp─┬─max_temp─┐ +│ 1 │ North │ 3 │ 8 │ +│ 2 │ South │ 15 │ 18 │ +│ 3 │ West │ 10 │ 10 │ +│ 4 │ East │ 2 │ 8 │ +└─────────────┴───────────────┴──────────┴──────────┘ +``` + +:::note +With `SimpleState`, you do not need to use the `Merge` combinator to combine +partial aggregation states. +::: + +## See also {#see-also} +- [`min`](/sql-reference/aggregate-functions/reference/min) +- [`SimpleState combinator`](/sql-reference/aggregate-functions/combinators#-simplestate) +- [`SimpleAggregateFunction type`](/sql-reference/data-types/simpleaggregatefunction) diff --git a/docs/guides/examples/aggregate_function_combinators/quantilesTimingArrayIf.md b/docs/guides/examples/aggregate_function_combinators/quantilesTimingArrayIf.md new file mode 100644 index 00000000000..651734121f2 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/quantilesTimingArrayIf.md @@ -0,0 +1,62 @@ +--- +slug: '/examples/aggregate-function-combinators/quantilesTimingArrayIf' +title: 'quantilesTimingArrayIf' +description: 'Example of using the quantilesTimingArrayIf combinator' +keywords: ['quantilesTiming', 'array', 'if', 'combinator', 'examples', 'quantilesTimingArrayIf'] +sidebar_label: 'quantilesTimingArrayIf' +--- + +# quantilesTimingArrayIf {#quantilestimingarrayif} + +## Description {#description} + +The [`Array`](/sql-reference/aggregate-functions/combinators#-array) and [`If`](/sql-reference/aggregate-functions/combinators#-if) +combinator can be applied to the [`quantilesTiming`](/sql-reference/aggregate-functions/reference/quantiletiming) +function to calculate quantiles of timing values in arrays for rows where the condition is true, +using the `quantilesTimingArrayIf` aggregate combinator function. + +## Example Usage {#example-usage} + +In this example, we'll create a table that stores API response times for different endpoints, +and we'll use `quantilesTimingArrayIf` to calculate response time quantiles for successful requests. + +```sql title="Query" +CREATE TABLE api_responses( + endpoint String, + response_times_ms Array(UInt32), + success_rate Float32 +) ENGINE = Log; + +INSERT INTO api_responses VALUES + ('orders', [82, 94, 98, 87, 103, 92, 89, 105], 0.98), + ('products', [45, 52, 48, 51, 49, 53, 47, 50], 0.95), + ('users', [120, 125, 118, 122, 121, 119, 123, 124], 0.92); + +SELECT + endpoint, + quantilesTimingArrayIf(0, 0.25, 0.5, 0.75, 0.95, 0.99, 1.0)(response_times_ms, success_rate >= 0.95) as response_time_quantiles +FROM api_responses +GROUP BY endpoint; +``` + +The `quantilesTimingArrayIf` function will calculate quantiles only for endpoints with a success rate above 95%. +The returned array contains the following quantiles in order: +- 0 (minimum) +- 0.25 (first quartile) +- 0.5 (median) +- 0.75 (third quartile) +- 0.95 (95th percentile) +- 0.99 (99th percentile) +- 1.0 (maximum) + +```response title="Response" + ┌─endpoint─┬─response_time_quantiles─────────────────────────────────────────────┐ +1. │ orders │ [82, 87, 92, 98, 103, 104, 105] │ +2. │ products │ [45, 47, 49, 51, 52, 52, 53] │ +3. │ users │ [nan, nan, nan, nan, nan, nan, nan] │ + └──────────┴─────────────────────────────────────────────────────────────────────┘ +``` + +## See also {#see-also} +- [`quantilesTiming`](/sql-reference/aggregate-functions/reference/quantiletiming) +- [`If combinator`](/sql-reference/aggregate-functions/combinators#-if) diff --git a/docs/guides/examples/aggregate_function_combinators/quantilesTimingIf.md b/docs/guides/examples/aggregate_function_combinators/quantilesTimingIf.md new file mode 100644 index 00000000000..6d33707032c --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/quantilesTimingIf.md @@ -0,0 +1,82 @@ +--- +slug: '/examples/aggregate-function-combinators/quantilesTimingIf' +title: 'quantilesTimingIf' +description: 'Example of using the quantilesTimingIf combinator' +keywords: ['quantilesTiming', 'if', 'combinator', 'examples', 'quantilesTimingIf'] +sidebar_label: 'quantilesTimingIf' +--- + +# quantilesTimingIf {#quantilestimingif} + +## Description {#description} + +The [`If`](/sql-reference/aggregate-functions/combinators#-if) combinator can be applied to the [`quantilesTiming`](/sql-reference/aggregate-functions/reference/quantiletiming) +function to calculate quantiles of timing values for rows where the condition is true, +using the `quantilesTimingIf` aggregate combinator function. + +## Example Usage {#example-usage} + +In this example, we'll create a table that stores API response times for different endpoints, +and we'll use `quantilesTimingIf` to calculate response time quantiles for successful requests. + +```sql title="Query" +CREATE TABLE api_responses( + endpoint String, + response_time_ms UInt32, + is_successful UInt8 +) ENGINE = Log; + +INSERT INTO api_responses VALUES + ('orders', 82, 1), + ('orders', 94, 1), + ('orders', 98, 1), + ('orders', 87, 1), + ('orders', 103, 1), + ('orders', 92, 1), + ('orders', 89, 1), + ('orders', 105, 1), + ('products', 45, 1), + ('products', 52, 1), + ('products', 48, 1), + ('products', 51, 1), + ('products', 49, 1), + ('products', 53, 1), + ('products', 47, 1), + ('products', 50, 1), + ('users', 120, 0), + ('users', 125, 0), + ('users', 118, 0), + ('users', 122, 0), + ('users', 121, 0), + ('users', 119, 0), + ('users', 123, 0), + ('users', 124, 0); + +SELECT + endpoint, + quantilesTimingIf(0, 0.25, 0.5, 0.75, 0.95, 0.99, 1.0)(response_time_ms, is_successful = 1) as response_time_quantiles +FROM api_responses +GROUP BY endpoint; +``` + +The `quantilesTimingIf` function will calculate quantiles only for successful requests (is_successful = 1). +The returned array contains the following quantiles in order: +- 0 (minimum) +- 0.25 (first quartile) +- 0.5 (median) +- 0.75 (third quartile) +- 0.95 (95th percentile) +- 0.99 (99th percentile) +- 1.0 (maximum) + +```response title="Response" + ┌─endpoint─┬─response_time_quantiles─────────────────────────────────────────────┐ +1. │ orders │ [82, 87, 92, 98, 103, 104, 105] │ +2. │ products │ [45, 47, 49, 51, 52, 52, 53] │ +3. │ users │ [nan, nan, nan, nan, nan, nan, nan] │ + └──────────┴─────────────────────────────────────────────────────────────────────┘ +``` + +## See also {#see-also} +- [`quantilesTiming`](/sql-reference/aggregate-functions/reference/quantiletiming) +- [`If combinator`](/sql-reference/aggregate-functions/combinators#-if) diff --git a/docs/guides/examples/aggregate_function_combinators/sumArray.md b/docs/guides/examples/aggregate_function_combinators/sumArray.md new file mode 100644 index 00000000000..81417956ee3 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/sumArray.md @@ -0,0 +1,56 @@ +--- +slug: '/examples/aggregate-function-combinators/sumArray' +title: 'sumArray' +description: 'Example of using the sumArray combinator' +keywords: ['sum', 'array', 'combinator', 'examples', 'sumArray'] +sidebar_label: 'sumArray' +--- + +# sumArray {#sumarray} + +## Description {#description} + +The [`Array`](/sql-reference/aggregate-functions/combinators#-array) combinator +can be applied to the [`sum`](/sql-reference/aggregate-functions/reference/sum) +function to calculate the sum of all elements in an array, using the `sumArray` +aggregate combinator function. + +The `sumArray` function is useful when you need to calculate the total sum of +all elements across multiple arrays in a dataset. + +## Example Usage {#example-usage} + +In this example, we'll use a sample dataset of daily sales across different +product categories to demonstrate how `sumArray` works. We'll calculate the total +sales across all categories for each day. + +```sql title="Query" +CREATE TABLE daily_category_sales +( + date Date, + category_sales Array(UInt32) +) ENGINE = Memory; + +INSERT INTO daily_category_sales VALUES + ('2024-01-01', [100, 200, 150]), + ('2024-01-02', [120, 180, 160]), + ('2024-01-03', [90, 220, 140]); + +SELECT + date, + category_sales, + sumArray(category_sales) as total_sales_sumArray, + sum(arraySum(category_sales)) as total_sales_arraySum +FROM daily_category_sales +GROUP BY date, category_sales; +``` + +The `sumArray` function will sum up all elements in each `category_sales` array. +For example, on `2024-01-01`, it sums `100 + 200 + 150 = 450`. This gives the +same result as `arraySum`. + +## See also {#see-also} +- [`sum`](/sql-reference/aggregate-functions/reference/sum) +- [`arraySum`](/sql-reference/functions/array-functions#arraysum) +- [`Array combinator`](/sql-reference/aggregate-functions/combinators#-array) +- [`sumMap`](/examples/aggregate-function-combinators/sumMap) diff --git a/docs/guides/examples/aggregate_function_combinators/sumForEach.md b/docs/guides/examples/aggregate_function_combinators/sumForEach.md new file mode 100644 index 00000000000..1b3be84900e --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/sumForEach.md @@ -0,0 +1,48 @@ +--- +slug: '/examples/aggregate-function-combinators/sumForEach' +title: 'sumForEach' +description: 'Example of using the sumArray combinator' +keywords: ['sum', 'array', 'combinator', 'examples', 'sumArray'] +sidebar_label: 'sumArray' +--- + +# sumArray {#sumforeach} + +## Description {#description} + +The [`ForEach`](/sql-reference/aggregate-functions/combinators#-foreach) combinator +can be applied to the [`sum`](/sql-reference/aggregate-functions/reference/sum) aggregate function to turn it from an aggregate +function which operates on row values to an aggregate function which operates on +array columns, applying the aggregate to each element in the array across rows. + +## Example Usage {#example-usage} + +For this example we'll make use of the `hits` dataset available in our [SQL playground](https://sql.clickhouse.com/). + +The `hits` table contains a column called `isMobile` of type UInt8 which can be +`0` for Desktop or `1` for mobile: + +```sql runnable +SELECT EventTime, IsMobile FROM metrica.hits ORDER BY rand() LIMIT 10 +``` + +We'll use the `sumForEach` aggregate combinator function to analyze how +desktop versus mobile traffic varies by hour of the day. Click the play button +below to run the query interactively: + +```sql runnable +SELECT + toHour(EventTime) AS hour_of_day, + -- Use sumForEach to count desktop and mobile visits in one pass + sumForEach([ + IsMobile = 0, -- Desktop visits (IsMobile = 0) + IsMobile = 1 -- Mobile visits (IsMobile = 1) + ]) AS device_counts +FROM metrica.hits +GROUP BY hour_of_day +ORDER BY hour_of_day; +``` + +## See also {#see-also} +- [`sum`](/sql-reference/aggregate-functions/reference/sum) +- [`ForEach combinator`](/sql-reference/aggregate-functions/combinators#-foreach) diff --git a/docs/guides/examples/aggregate_function_combinators/sumIf.md b/docs/guides/examples/aggregate_function_combinators/sumIf.md new file mode 100644 index 00000000000..324868f6344 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/sumIf.md @@ -0,0 +1,125 @@ +--- +slug: '/examples/aggregate-function-combinators/sumIf' +title: 'sumIf' +description: 'Example of using the sumIf combinator' +keywords: ['sum', 'if', 'combinator', 'examples', 'sumIf'] +sidebar_label: 'sumIf' +--- + +# sumIf {#sumif} + +## Description {#description} + +The [`If`](/sql-reference/aggregate-functions/combinators#-if) combinator can be applied to the [`sum`](/sql-reference/aggregate-functions/reference/sum) +function to calculate the sum of values for rows where the condition is true, +using the `sumIf` aggregate combinator function. + +## Example Usage {#example-usage} + +In this example, we'll create a table that stores sales data with success flags, +and we'll use `sumIf` to calculate the total sales amount for successful transactions. + +```sql title="Query" +CREATE TABLE sales( + transaction_id UInt32, + amount Decimal(10,2), + is_successful UInt8 +) ENGINE = Log; + +INSERT INTO sales VALUES + (1, 100.50, 1), + (2, 200.75, 1), + (3, 150.25, 0), + (4, 300.00, 1), + (5, 250.50, 0), + (6, 175.25, 1); + +SELECT + sumIf(amount, is_successful = 1) as total_successful_sales +FROM sales; +``` + +The `sumIf` function will sum only the amounts where `is_successful = 1`. +In this case, it will sum: 100.50 + 200.75 + 300.00 + 175.25. + +```response title="Response" + ┌─total_successful_sales─┐ +1. │ 776.50 │ + └───────────────────────┘ +``` + +### Calculate trading volume by price direction {#calculate-trading-vol-price-direction} + +In this example we'll use the `stock` table available at [ClickHouse playground](https://sql.clickhouse.com/) +to calculate trading volume by price direction in the first half of the year 2002. + +```sql title="Query" +SELECT + toStartOfMonth(date) AS month, + formatReadableQuantity(sumIf(volume, price > open)) AS volume_on_up_days, + formatReadableQuantity(sumIf(volume, price < open)) AS volume_on_down_days, + formatReadableQuantity(sumIf(volume, price = open)) AS volume_on_neutral_days, + formatReadableQuantity(sum(volume)) AS total_volume +FROM stock.stock +WHERE date BETWEEN '2002-01-01' AND '2002-12-31' +GROUP BY month +ORDER BY month; +``` + +```markdown + ┌──────month─┬─volume_on_up_days─┬─volume_on_down_days─┬─volume_on_neutral_days─┬─total_volume──┐ + 1. │ 2002-01-01 │ 26.07 billion │ 30.74 billion │ 781.80 million │ 57.59 billion │ + 2. │ 2002-02-01 │ 20.84 billion │ 29.60 billion │ 642.36 million │ 51.09 billion │ + 3. │ 2002-03-01 │ 28.81 billion │ 23.57 billion │ 762.60 million │ 53.14 billion │ + 4. │ 2002-04-01 │ 24.72 billion │ 30.99 billion │ 763.92 million │ 56.47 billion │ + 5. │ 2002-05-01 │ 25.09 billion │ 30.57 billion │ 858.57 million │ 56.52 billion │ + 6. │ 2002-06-01 │ 29.10 billion │ 30.88 billion │ 875.71 million │ 60.86 billion │ + 7. │ 2002-07-01 │ 32.27 billion │ 41.73 billion │ 747.32 million │ 74.75 billion │ + 8. │ 2002-08-01 │ 28.57 billion │ 27.49 billion │ 1.17 billion │ 57.24 billion │ + 9. │ 2002-09-01 │ 23.37 billion │ 31.02 billion │ 775.66 million │ 55.17 billion │ +10. │ 2002-10-01 │ 38.57 billion │ 34.05 billion │ 956.48 million │ 73.57 billion │ +11. │ 2002-11-01 │ 34.90 billion │ 25.47 billion │ 998.34 million │ 61.37 billion │ +12. │ 2002-12-01 │ 22.99 billion │ 28.65 billion │ 1.14 billion │ 52.79 billion │ + └────────────┴───────────────────┴─────────────────────┴────────────────────────┴───────────────┘ +``` + +### Calculate trading volume by stock symbol {#calculate-trading-volume} + +In this example we'll use the `stock` table available at [ClickHouse playground](https://sql.clickhouse.com/) +to calculate trading volume by stock symbol in 2006 for three of the largest tech +companies at the time. + +```sql title="Query" +SELECT + toStartOfMonth(date) AS month, + formatReadableQuantity(sumIf(volume, symbol = 'AAPL')) AS apple_volume, + formatReadableQuantity(sumIf(volume, symbol = 'MSFT')) AS microsoft_volume, + formatReadableQuantity(sumIf(volume, symbol = 'GOOG')) AS google_volume, + sum(volume) AS total_volume, + round(sumIf(volume, symbol IN ('AAPL', 'MSFT', 'GOOG')) / sum(volume) * 100, 2) AS major_tech_percentage +FROM stock.stock +WHERE date BETWEEN '2006-01-01' AND '2006-12-31' +GROUP BY month +ORDER BY month; +``` + +```markdown title="Response" + ┌──────month─┬─apple_volume───┬─microsoft_volume─┬─google_volume──┬─total_volume─┬─major_tech_percentage─┐ + 1. │ 2006-01-01 │ 782.21 million │ 1.39 billion │ 299.69 million │ 84343937700 │ 2.93 │ + 2. │ 2006-02-01 │ 670.38 million │ 1.05 billion │ 297.65 million │ 73524748600 │ 2.74 │ + 3. │ 2006-03-01 │ 744.85 million │ 1.39 billion │ 288.36 million │ 87960830800 │ 2.75 │ + 4. │ 2006-04-01 │ 718.97 million │ 1.45 billion │ 185.65 million │ 78031719800 │ 3.02 │ + 5. │ 2006-05-01 │ 557.89 million │ 2.32 billion │ 174.94 million │ 97096584100 │ 3.14 │ + 6. │ 2006-06-01 │ 641.48 million │ 1.98 billion │ 142.55 million │ 96304086800 │ 2.87 │ + 7. │ 2006-07-01 │ 624.93 million │ 1.33 billion │ 127.74 million │ 79940921800 │ 2.61 │ + 8. │ 2006-08-01 │ 639.35 million │ 1.13 billion │ 107.16 million │ 84251753200 │ 2.23 │ + 9. │ 2006-09-01 │ 633.45 million │ 1.10 billion │ 121.72 million │ 82775234300 │ 2.24 │ +10. │ 2006-10-01 │ 514.82 million │ 1.29 billion │ 158.90 million │ 93406712600 │ 2.1 │ +11. │ 2006-11-01 │ 494.37 million │ 1.24 billion │ 118.49 million │ 90177365500 │ 2.06 │ +12. │ 2006-12-01 │ 603.95 million │ 1.14 billion │ 91.77 million │ 80499584100 │ 2.28 │ + └────────────┴────────────────┴──────────────────┴────────────────┴──────────────┴───────────────────────┘ +``` + +## See also {#see-also} +- [`sum`](/sql-reference/aggregate-functions/reference/sum) +- [`If combinator`](/sql-reference/aggregate-functions/combinators#-if) diff --git a/docs/guides/examples/aggregate_function_combinators/sumMap.md b/docs/guides/examples/aggregate_function_combinators/sumMap.md new file mode 100644 index 00000000000..17829dbb144 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/sumMap.md @@ -0,0 +1,65 @@ +--- +slug: '/examples/aggregate-function-combinators/sumMap' +title: 'sumMap' +description: 'Example of using the sumMap combinator' +keywords: ['sum', 'map', 'combinator', 'examples', 'sumMap'] +sidebar_label: 'sumMap' +--- + +# sumMap {#summap} + +## Description {#description} + +The [`Map`](/sql-reference/aggregate-functions/combinators#-map) combinator can be applied to the [`sum`](/sql-reference/aggregate-functions/reference/sum) +function to calculate the sum of values in a Map according to each key, using the `sumMap` +aggregate combinator function. + +## Example Usage {#example-usage} + +In this example, we'll create a table that stores status codes and their counts for different timeslots, +where each row contains a Map of status codes to their corresponding counts. We'll use +`sumMap` to calculate the total count for each status code within each timeslot. + +```sql title="Query" +CREATE TABLE metrics( + date Date, + timeslot DateTime, + status Map(String, UInt64) +) ENGINE = Log; + +INSERT INTO metrics VALUES + ('2000-01-01', '2000-01-01 00:00:00', (['a', 'b', 'c'], [15, 25, 35])), + ('2000-01-01', '2000-01-01 00:00:00', (['c', 'd', 'e'], [45, 55, 65])), + ('2000-01-01', '2000-01-01 00:01:00', (['d', 'e', 'f'], [75, 85, 95])), + ('2000-01-01', '2000-01-01 00:01:00', (['f', 'g', 'g'], [105, 115, 125])); + +SELECT + timeslot, + sumMap(status), +FROM metrics +GROUP BY timeslot; +``` + +The `sumMap` function will calculate the total count for each status code within each timeslot. For example: +- In timeslot '2000-01-01 00:00:00': + - Status 'a': 15 + - Status 'b': 25 + - Status 'c': 35 + 45 = 80 + - Status 'd': 55 + - Status 'e': 65 +- In timeslot '2000-01-01 00:01:00': + - Status 'd': 75 + - Status 'e': 85 + - Status 'f': 95 + 105 = 200 + - Status 'g': 115 + 125 = 240 + +```response title="Response" + ┌────────────timeslot─┬─sumMap(status)───────────────────────┐ +1. │ 2000-01-01 00:01:00 │ {'d':75,'e':85,'f':200,'g':240} │ +2. │ 2000-01-01 00:00:00 │ {'a':15,'b':25,'c':80,'d':55,'e':65} │ + └─────────────────────┴──────────────────────────────────────┘ +``` + +## See also {#see-also} +- [`sum`](/sql-reference/aggregate-functions/reference/sum) +- [`Map combinator`](/sql-reference/aggregate-functions/combinators#-map) diff --git a/docs/guides/examples/aggregate_function_combinators/sumSimpleState.md b/docs/guides/examples/aggregate_function_combinators/sumSimpleState.md new file mode 100644 index 00000000000..d048462ba85 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/sumSimpleState.md @@ -0,0 +1,106 @@ +--- +slug: '/examples/aggregate-function-combinators/sumSimpleState' +title: 'sumSimpleState' +description: 'Example of using the sumSimpleState combinator' +keywords: ['sum', 'state', 'simple', 'combinator', 'examples', 'sumSimpleState'] +sidebar_label: 'sumSimpleState' +--- + +# sumSimpleState {#sumsimplestate} + +## Description {#description} + +The [`SimpleState`](/sql-reference/aggregate-functions/combinators#-simplestate) combinator can be applied to the [`sum`](/sql-reference/aggregate-functions/reference/sum) +function to return the sum across all input values. It returns the result with +type [`SimpleAggregateFunction`](/docs/sql-reference/data-types/simpleaggregatefunction). + +## Example Usage {#example-usage} + +### Tracking upvotes and downvotes {#tracking-post-votes} + +Let's look at a practical example using a table that tracks votes on posts. +For each post, we want to maintain running totals of upvotes, downvotes, and an +overall score. Using the `SimpleAggregateFunction` type with sum is suited for +this use case as we only need to store the running totals, not the entire state +of the aggregation. As a result, it will be faster and will not require merging +of partial aggregate states. + +First, we create a table for the raw data: + +```sql title="Query" +CREATE TABLE raw_votes +( + post_id UInt32, + vote_type Enum8('upvote' = 1, 'downvote' = -1) +) +ENGINE = MergeTree() +ORDER BY post_id; +``` + +Next, we create a target table which will store the aggregated data: + +```sql +CREATE TABLE vote_aggregates +( + post_id UInt32, + upvotes SimpleAggregateFunction(sum, UInt64), + downvotes SimpleAggregateFunction(sum, UInt64), + score SimpleAggregateFunction(sum, Int64) +) +ENGINE = AggregatingMergeTree() +ORDER BY post_id; +``` + +We then create a Materialized View with `SimpleAggregateFunction` type columns: + +```sql +CREATE MATERIALIZED VIEW mv_vote_processor TO vote_aggregates +AS +SELECT + post_id, + -- Initial value for sum state (1 if upvote, 0 otherwise) + toUInt64(vote_type = 'upvote') AS upvotes, + -- Initial value for sum state (1 if downvote, 0 otherwise) + toUInt64(vote_type = 'downvote') AS downvotes, + -- Initial value for sum state (1 for upvote, -1 for downvote) + toInt64(vote_type) AS score +FROM raw_votes; +``` + +Insert sample data: + +```sql +INSERT INTO raw_votes VALUES + (1, 'upvote'), + (1, 'upvote'), + (1, 'downvote'), + (2, 'upvote'), + (2, 'downvote'), + (3, 'downvote'); +``` + +Query the Materialized View using the `SimpleState` combinator: + +```sql +SELECT + post_id, + sum(upvotes) AS total_upvotes, + sum(downvotes) AS total_downvotes, + sum(score) AS total_score +FROM vote_aggregates -- Query the target table +GROUP BY post_id +ORDER BY post_id ASC; +``` + +```response +┌─post_id─┬─total_upvotes─┬─total_downvotes─┬─total_score─┐ +│ 1 │ 2 │ 1 │ 1 │ +│ 2 │ 1 │ 1 │ 0 │ +│ 3 │ 0 │ 1 │ -1 │ +└─────────┴───────────────┴─────────────────┴─────────────┘ +``` + +## See also {#see-also} +- [`sum`](/sql-reference/aggregate-functions/reference/sum) +- [`SimpleState combinator`](/sql-reference/aggregate-functions/combinators#-simplestate) +- [`SimpleAggregateFunction type`](/sql-reference/data-types/simpleaggregatefunction) diff --git a/docs/guides/examples/aggregate_function_combinators/uniqArray.md b/docs/guides/examples/aggregate_function_combinators/uniqArray.md new file mode 100644 index 00000000000..8d2181e829a --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/uniqArray.md @@ -0,0 +1,61 @@ +--- +slug: '/examples/aggregate-function-combinators/uniqArray' +title: 'uniqArray' +description: 'Example of using the uniqArray combinator' +keywords: ['uniq', 'array', 'combinator', 'examples', 'uniqArray'] +sidebar_label: 'uniqArray' +--- + +# uniqArray {#uniqarray} + +## Description {#description} + +The [`Array`](/sql-reference/aggregate-functions/combinators#-array) combinator +can be applied to the [`uniq`](/sql-reference/aggregate-functions/reference/uniq) +function to calculate the approximate number of unique elements across all arrays, +using the `uniqArray` aggregate combinator function. + +The `uniqArray` function is useful when you need to count unique elements across +multiple arrays in a dataset. It's equivalent to using `uniq(arrayJoin())`, where +`arrayJoin` first flattens the arrays and then `uniq` counts the unique elements. + +## Example Usage {#example-usage} + +In this example, we'll use a sample dataset of user interests across different +categories to demonstrate how `uniqArray` works. We'll compare it with +`uniq(arrayJoin())` to show the difference in counting unique elements. + +```sql title="Query" +CREATE TABLE user_interests +( + user_id UInt32, + interests Array(String) +) ENGINE = Memory; + +INSERT INTO user_interests VALUES + (1, ['reading', 'gaming', 'music']), + (2, ['gaming', 'sports', 'music']), + (3, ['reading', 'cooking']); + +SELECT + uniqArray(interests) as unique_interests_total, + uniq(arrayJoin(interests)) as unique_interests_arrayJoin +FROM user_interests; +``` + +The `uniqArray` function counts unique elements across all arrays combined, similar to `uniq(arrayJoin())`. +In this example: +- `uniqArray` returns 5 because there are 5 unique interests across all users: 'reading', 'gaming', 'music', 'sports', 'cooking' +- `uniq(arrayJoin())` also returns 5, showing that both functions count unique elements across all arrays + +```response title="Response" + ┌─unique_interests_total─┬─unique_interests_arrayJoin─┐ +1. │ 5 │ 5 │ + └────────────────────────┴────────────────────────────┘ +``` + +## See also {#see-also} +- [`uniq`](/sql-reference/aggregate-functions/reference/uniq) +- [`arrayJoin`](/sql-reference/functions/array-join) +- [`Array combinator`](/sql-reference/aggregate-functions/combinators#-array) +- [`uniqCombined`](/sql-reference/aggregate-functions/reference/uniqcombined) diff --git a/docs/guides/examples/aggregate_function_combinators/uniqArrayIf.md b/docs/guides/examples/aggregate_function_combinators/uniqArrayIf.md new file mode 100644 index 00000000000..04caee8afc3 --- /dev/null +++ b/docs/guides/examples/aggregate_function_combinators/uniqArrayIf.md @@ -0,0 +1,83 @@ +--- +slug: '/examples/aggregate-function-combinators/uniqArrayIf' +title: 'uniqArrayIf' +description: 'Example of using the uniqArrayIf combinator' +keywords: ['uniq', 'array', 'if', 'combinator', 'examples', 'uniqArrayIf'] +sidebar_label: 'uniqArrayIf' +--- + +# uniqArrayIf {#uniqarrayif} + +## Description {#description} + +The [`Array`](/sql-reference/aggregate-functions/combinators#-array) and [`If`](/sql-reference/aggregate-functions/combinators#-if) combinators can be applied to the [`uniq`](/sql-reference/aggregate-functions/reference/uniq) +function to count the number of unique values in arrays for rows where the +condition is true, using the `uniqArrayIf` aggregate combinator function. + +:::note +-`If` and -`Array` can be combined. However, `Array` must come first, then `If`. +::: + +This is useful when you want to count unique elements in an array based on +specific conditions without having to use `arrayJoin`. + +## Example Usage {#example-usage} + +### Count unique products viewed by segment type and engagement level {#count-unique-products} + +In this example, we'll use a table with user shopping session data to count the +number of unique products viewed by users of a specific user segment and with +an engagement metric of time spent in the session. + +```sql title="Query" +CREATE TABLE user_shopping_sessions +( + session_date Date, + user_segment String, + viewed_products Array(String), + session_duration_minutes Int32 +) ENGINE = Memory; + +INSERT INTO user_shopping_sessions VALUES + ('2024-01-01', 'new_customer', ['smartphone_x', 'headphones_y', 'smartphone_x'], 12), + ('2024-01-01', 'returning', ['laptop_z', 'smartphone_x', 'tablet_a'], 25), + ('2024-01-01', 'new_customer', ['smartwatch_b', 'headphones_y', 'fitness_tracker'], 8), + ('2024-01-02', 'returning', ['laptop_z', 'external_drive', 'laptop_z'], 30), + ('2024-01-02', 'new_customer', ['tablet_a', 'keyboard_c', 'tablet_a'], 15), + ('2024-01-02', 'premium', ['smartphone_x', 'smartwatch_b', 'headphones_y'], 22); + +-- Count unique products viewed by segment type and engagement level +SELECT + session_date, + -- Count unique products viewed in long sessions by new customers + uniqArrayIf(viewed_products, user_segment = 'new_customer' AND session_duration_minutes > 10) AS new_customer_engaged_products, + -- Count unique products viewed by returning customers + uniqArrayIf(viewed_products, user_segment = 'returning') AS returning_customer_products, + -- Count unique products viewed across all sessions + uniqArray(viewed_products) AS total_unique_products +FROM user_shopping_sessions +GROUP BY session_date +ORDER BY session_date +FORMAT Vertical; +``` + +```response title="Response" +Row 1: +────── +session_date: 2024-01-01 +new_customer⋯ed_products: 2 +returning_customer_products: 3 +total_unique_products: 6 + +Row 2: +────── +session_date: 2024-01-02 +new_customer⋯ed_products: 2 +returning_customer_products: 2 +total_unique_products: 7 +``` + +## See also {#see-also} +- [`uniq`](/sql-reference/aggregate-functions/reference/uniq) +- [`Array combinator`](/sql-reference/aggregate-functions/combinators#-array) +- [`If combinator`](/sql-reference/aggregate-functions/combinators#-if) diff --git a/scripts/aspell-ignore/en/aspell-dict.txt b/scripts/aspell-ignore/en/aspell-dict.txt index 83ba4bba4b3..c8c3ba47882 100644 --- a/scripts/aspell-ignore/en/aspell-dict.txt +++ b/scripts/aspell-ignore/en/aspell-dict.txt @@ -3483,3 +3483,33 @@ VersionedCollapsing subpath AICPA restartable +sumArray +sumForEach +argMaxIf +groupArrayResample +downsampled +uniqArrayIf +minSimpleState +sumArray +avgMerge +avgMergeState +timeslot +timeslots +groupArrayDistinct +avgMap +avgState +avgIf +quantilesTiming +quantilesTimingIf +quantilesTimingArrayIf +downvotes +sumSimpleState +upvotes +uniqArray +avgResample +countResample +avgMerge +avgState +argMinIf +minSimpleState +maxSimpleState diff --git a/sidebars.js b/sidebars.js index 232758d2a0e..15ea16cea9c 100644 --- a/sidebars.js +++ b/sidebars.js @@ -581,6 +581,18 @@ const sidebars = { type: "autogenerated", dirName: "sql-reference/aggregate-functions", }, + { + type: "category", + label: "Combinator examples", + collapsed: true, + collapsible: true, + items: [ + { + type: "autogenerated", + dirName: "guides/examples/aggregate_function_combinators", + } + ] + }, ], }, {