diff --git a/README.md b/README.md
index 4de5831..8a98684 100644
--- a/README.md
+++ b/README.md
@@ -110,23 +110,28 @@ benchmark configurations using supported modules.**
## Support Matrix
-### setup / installation
+### Setup / Installation
-| system | local | aws | docker | gcp | azure |
-|------------|-------|---------------|--------|-----|-------|
-| Exasol | ✗ | ✓1 | ✗ | ✗ | ✗ |
-| ClickHouse | ✗ | ✓1 | ✗ | ✗ | ✗ |
+| system | local | aws | docker | gcp | azure |
+|------------|-------|-----|--------|-----|-------|
+| Exasol | ✗ | ✓ | ✗ | ✗ | ✗ |
+| ClickHouse | ✗ | ✓ | ✗ | ✗ | ✗ |
-Notes:
+### Workloads
-1. Only single-node deployments supported at this time.
+
+[clickbench]: benchkit/workloads/clickbench/README.md "ClickHouse ClickBench"
+[estuary]: benchkit/workloads/estuary/README.md "Estuary Warehouse Report"
-### "tpch" workload
+| system | tpch | [clickbench] | [estuary] |
+|------------|------|--------------|-----------------|
+| Exasol | ✓ | ✓ | ✓(1) |
+| ClickHouse | ✓ | ✓ | ✗(1) |
+
+Notes:
+
+1. Work in Progress
-| system | local | aws | docker | gcp | azure |
-|------------|-------|-----|--------|-----|-------|
-| Exasol | ✗ | ✓ | ✗ | ✗ | ✗ |
-| ClickHouse | ✗ | ✓ | ✗ | ✗ | ✗ |
## Documentation
diff --git a/benchkit/common/__init__.py b/benchkit/common/__init__.py
index e69de29..89dc356 100644
--- a/benchkit/common/__init__.py
+++ b/benchkit/common/__init__.py
@@ -0,0 +1,2 @@
+from .file_management import download_file_to_storage
+from .markers import exclude_from_package
diff --git a/benchkit/common/file_management.py b/benchkit/common/file_management.py
new file mode 100644
index 0000000..5d4d07b
--- /dev/null
+++ b/benchkit/common/file_management.py
@@ -0,0 +1,13 @@
+from pathlib import Path
+
+
+def download_file_to_storage(url: str, target: Path) -> None:
+ """Download data from a given URL to given target path"""
+ import requests
+
+ with (
+ requests.get(url, allow_redirects=True, stream=True) as handle,
+ open(target, "wb") as file,
+ ):
+ handle.raise_for_status()
+ file.write(handle.content)
diff --git a/benchkit/common/markers.py b/benchkit/common/markers.py
index 48b7859..13ff446 100644
--- a/benchkit/common/markers.py
+++ b/benchkit/common/markers.py
@@ -27,5 +27,5 @@ def exclude_from_package(obj: T) -> T:
... def install(self) -> bool:
... return self._install_database()
"""
- obj._exclude_from_package = True # type: ignore
+ obj._exclude_from_package = True # type: ignore[attr-defined]
return obj
diff --git a/benchkit/package/creator.py b/benchkit/package/creator.py
index 2916cfc..19455ff 100644
--- a/benchkit/package/creator.py
+++ b/benchkit/package/creator.py
@@ -5,17 +5,16 @@
import shutil
import zipfile
from pathlib import Path
-from typing import TYPE_CHECKING, Any, Literal, cast
+from typing import Any, Literal, cast
from jinja2 import Environment, FileSystemLoader
from rich.console import Console
-if TYPE_CHECKING:
- from ..systems.base import SystemUnderTest
- from ..util import ensure_directory
- from .code_minimizer import CodeMinimizer
- from .formatter import PackageFormatter
- from .import_cleaner import ImportCleaner
+from ..util import ensure_directory
+from .code_minimizer import CodeMinimizer
+from .formatter import PackageFormatter
+from .import_cleaner import ImportCleaner
+from ..systems.base import SystemUnderTest
console = Console()
@@ -352,9 +351,6 @@ def _copy_minimal_framework_files(self) -> None:
"run": [
"parsers.py"
], # Only parsers, no __init__.py to avoid import issues
- "package": [
- "markers.py", # imported by systems/base
- ],
"systems": None, # Copy all - needed for database connections
"workloads": None, # Copy all - needed for workload execution
"common": None,
diff --git a/benchkit/run/runner.py b/benchkit/run/runner.py
index 12f361c..60db368 100644
--- a/benchkit/run/runner.py
+++ b/benchkit/run/runner.py
@@ -6,20 +6,25 @@
from collections.abc import Callable
from dataclasses import dataclass, field
from pathlib import Path
-from typing import Any, Literal
+from typing import TYPE_CHECKING, Any, Literal
import pandas as pd
from rich.console import Console
-from ..common.markers import exclude_from_package
+from benchkit.common import exclude_from_package
+from benchkit.util import ensure_directory, load_json, save_json
+
from ..debug import is_debug_enabled
from ..systems import create_system
-from ..systems.base import SystemUnderTest
-from ..util import ensure_directory, load_json, save_json
from ..workloads import create_workload
from .parallel_executor import ParallelExecutor
from .parsers import normalize_runs
+if TYPE_CHECKING:
+ from benchkit.systems import SystemUnderTest
+ from benchkit.workloads import Workload
+
+
console = Console()
@@ -409,7 +414,7 @@ def _execute_phase(
phase: PhaseConfig,
context: ExecutionContext,
force: bool = False,
- workload: Any = None,
+ workload: "Workload | None" = None,
) -> bool | list[dict[str, Any]]:
"""
Universal phase executor handling setup, load, and query phases.
@@ -514,7 +519,7 @@ def _build_phase_tasks(
context: ExecutionContext,
force: bool,
package_path: Path | None,
- workload: Any,
+ workload: "Workload",
) -> dict[str, Callable[[], TaskResult]]:
"""
Build task callables for each system in the benchmark.
@@ -650,7 +655,7 @@ def _get_system_for_context(
self,
system_config: dict[str, Any],
context: ExecutionContext,
- ) -> SystemUnderTest:
+ ) -> "SystemUnderTest":
"""
Create system instance configured for the execution context.
@@ -736,11 +741,11 @@ def _get_system_for_context(
@exclude_from_package
def _setup_operation(
self,
- system: SystemUnderTest,
+ system: "SystemUnderTest",
system_config: dict[str, Any],
instance_manager: Any,
package_path: Path | None, # unused for setup
- workload: Any, # unused for setup
+ workload: "Workload", # unused for setup
) -> tuple[bool, dict[str, Any]]:
"""
Execute setup operation for a single system.
@@ -849,11 +854,11 @@ def _setup_operation(
def _load_operation(
self,
- system: SystemUnderTest,
+ system: "SystemUnderTest",
system_config: dict[str, Any],
instance_manager: Any,
package_path: Path | None,
- workload: Any,
+ workload: "Workload",
) -> tuple[bool, dict[str, Any]]:
"""
Execute load operation for a single system.
@@ -902,11 +907,11 @@ def _load_operation(
def _query_operation(
self,
- system: SystemUnderTest,
+ system: "SystemUnderTest",
system_config: dict[str, Any],
instance_manager: Any,
package_path: Path | None,
- workload: Any,
+ workload: "Workload",
) -> tuple[bool, list[dict[str, Any]]]:
"""
Execute query operation for a single system.
@@ -1044,7 +1049,7 @@ def _create_system_for_local_execution(
system_config: dict[str, Any],
instance_manager: Any,
output_callback: Callable[[str], None] | None = None,
- ) -> SystemUnderTest:
+ ) -> "SystemUnderTest":
"""Create system object configured for local-to-remote execution.
IMPORTANT: Uses the PUBLIC IP from the cloud instance manager to enable
@@ -1483,17 +1488,12 @@ def _prepare_storage_phase(self) -> bool:
return True
def _execute_queries(
- self, system: Any, workload: Any
+ self, system: "SystemUnderTest", workload: "Workload"
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
"""Execute benchmark queries with timing and monitoring."""
- query_names = self.config["workload"]["queries"]["include"]
runs_per_query = self.config["workload"]["runs_per_query"]
warmup_runs = self.config["workload"]["warmup_runs"]
- # If no queries specified, use workload's default (all queries)
- if not query_names:
- query_names = workload.queries_to_include
-
# Extract multiuser configuration
multiuser_config = self.config["workload"].get("multiuser") or {}
num_streams = 1
@@ -1508,7 +1508,7 @@ def _execute_queries(
# Execute queries
result_dict = workload.run_workload(
system=system,
- query_names=query_names,
+ query_names=workload.get_included_queries(),
runs_per_query=runs_per_query,
warmup_runs=warmup_runs,
num_streams=num_streams,
@@ -1570,7 +1570,7 @@ def _save_setup_summary(
def _load_setup_summary_to_system(
self,
- system: SystemUnderTest,
+ system: "SystemUnderTest",
system_name: str,
executor: "ParallelExecutor | None" = None,
) -> None:
diff --git a/benchkit/systems/base.py b/benchkit/systems/base.py
index 375758d..2de44ce 100644
--- a/benchkit/systems/base.py
+++ b/benchkit/systems/base.py
@@ -7,13 +7,15 @@
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal
-from benchkit.common.markers import exclude_from_package
+from benchkit.common import exclude_from_package
+
+from ..util import safe_command
if TYPE_CHECKING:
# avoid cyclic dependency problems
- from ..util import safe_command
from ..workloads import Workload
+
TableOperation = Literal[
"DEFAULT",
"OPTIMIZE TABLE",
@@ -70,6 +72,8 @@ def __init__(
# Workload configuration for dynamic tuning
self.workload_config = workload_config or {}
+ # default schema for queries
+ self.schema: str | None = None
# Command recording for report reproduction
self.setup_commands: list[dict[str, Any]] = []
@@ -421,6 +425,64 @@ def load_data_from_iterable(
"""
pass
+ def load_data_from_url_with_download(
+ self,
+ schema_name: str,
+ table_name: str,
+ data_url: str | list[str],
+ /,
+ extension: str = ".csv",
+ **kwargs: Any,
+ ) -> bool:
+ """
+ Download given resources to host filesystem
+ and then load files into database using `load_data()` above
+ """
+ from benchkit.common import download_file_to_storage
+
+ local_storage: Path = self.data_dir or Path("/var/tmp")
+ downloaded_files: list[Path] = []
+ file_part: int = 0
+ ## download files locally
+ url_list: list[str] = [data_url] if isinstance(data_url, str) else data_url
+ for url in url_list:
+ file_name: str = f"{table_name}_{file_part}{extension}"
+ target_path: Path = local_storage / file_name
+ if target_path.exists():
+ self._log(f"Download: reusing existing file {target_path}")
+ downloaded_files.append(target_path)
+ continue
+ self._log(f"Downloading {file_name} from {url}")
+ try:
+ download_file_to_storage(url, target_path)
+ downloaded_files.append(target_path)
+ except Exception as e:
+ self._log(f"Error downloading {file_name}: {e}")
+ return False
+
+ ## then, import files one by one
+ for file in downloaded_files:
+ if not self.load_data(table_name, file, schema=schema_name):
+ return False
+ return True
+
+ def load_data_from_url(
+ self,
+ schema_name: str,
+ table_name: str,
+ data_url: str | list[str],
+ /,
+ extension: str = ".csv",
+ **kwargs: Any,
+ ) -> bool:
+ """
+ Load table data from a URL or a set of URLs.
+ Default implementation downloads data to local storage and then imports the downloaded file(s) using load_data()
+ """
+ return self.load_data_from_url_with_download(
+ schema_name, table_name, data_url, extension=extension, **kwargs
+ )
+
@abstractmethod
def execute_query(
self,
@@ -1774,6 +1836,62 @@ def estimate_execution_time(
"""
return timedelta(minutes=5)
+ # noinspection PyMethodMayBeStatic
+ def split_sql_statements(self, sql: str) -> list[str]:
+ """
+ Split SQL script into individual statements.
+ Method is not static in case some system uses different syntax for splitting
+
+ Handles:
+ - Semicolon-separated statements
+ - SQL comments (-- and /* */)
+ - Empty lines
+
+ Returns:
+ List of individual SQL statements
+ """
+ statements = []
+ current_statement = []
+ in_comment = False
+
+ for line in sql.split("\n"):
+ stripped = line.strip()
+
+ # Skip SQL comments
+ if stripped.startswith("--"):
+ continue
+
+ # Handle multi-line comments
+ if "/*" in stripped:
+ in_comment = True
+ if "*/" in stripped:
+ in_comment = False
+ continue
+ if in_comment:
+ continue
+
+ # Skip empty lines
+ if not stripped:
+ continue
+
+ # Check if line ends with semicolon (statement terminator)
+ if stripped.endswith(";"):
+ # Add the line without semicolon to current statement
+ current_statement.append(stripped[:-1])
+ # Join and add to statements list
+ statements.append("\n".join(current_statement))
+ # Reset for next statement
+ current_statement = []
+ else:
+ # Add line to current statement
+ current_statement.append(stripped)
+
+ # Add any remaining statement (for scripts without trailing semicolon)
+ if current_statement:
+ statements.append("\n".join(current_statement))
+
+ return statements
+
def get_system_class(system_kind: str) -> type | None:
"""
diff --git a/benchkit/systems/clickhouse.py b/benchkit/systems/clickhouse.py
index e3ca7e3..488f5d9 100644
--- a/benchkit/systems/clickhouse.py
+++ b/benchkit/systems/clickhouse.py
@@ -15,11 +15,11 @@
from benchkit.common.markers import exclude_from_package
+from ..util import Timer
from .base import SystemUnderTest, TableOperation
if TYPE_CHECKING:
# avoid cyclic dependency problems
- from ..util import Timer
from ..workloads import Workload
@@ -1490,6 +1490,79 @@ def load_data_from_iterable(
) -> bool:
raise NotImplementedError("clickhouse.load_data_from_iterable")
+ def load_data_from_url(
+ self,
+ schema_name: str,
+ table_name: str,
+ data_url: str | list[str],
+ /,
+ extension: str = ".csv",
+ **kwargs: Any,
+ ) -> bool:
+ if isinstance(data_url, list):
+ raise NotImplementedError("Loading multiple URLs into clickhouse")
+
+ try:
+ self._log(f"Loading {data_url} into {table_name}...")
+
+ import_query: str = (
+ f"INSERT INTO {schema_name}.{table_name} SELECT * FROM url('{data_url}', CSV)"
+ )
+
+ if clickhouse_connect:
+ # Use clickhouse-connect for verification
+ client = self._get_client()
+ if not client:
+ self._log("Error: failed to connect to clickhouse system")
+ return False
+
+ import_res = client.query(import_query)
+ self._log(f"{import_res}")
+ count_result = client.query(
+ f"SELECT COUNT(*) FROM {schema_name}.{table_name}"
+ )
+ row_count = count_result.result_rows[0][0]
+
+ else:
+ import_cmd = (
+ f"clickhouse-client "
+ f"--user={self.username} "
+ f"--password={self.password} "
+ f"--query='{import_query}'"
+ )
+
+ # Execute the import
+ result = self.execute_command(import_cmd, timeout=3600.0, record=False)
+
+ if not result.get("success", False):
+ self._log(
+ f"Failed to load data: {result.get('stderr', 'Unknown error')}"
+ )
+ return False
+
+ # Verify data was loaded by counting rows
+ count_cmd = (
+ f"clickhouse-client "
+ f"--user={self.username} "
+ f"--password={self.password} "
+ f'--query="SELECT COUNT(*) FROM {schema_name}.{table_name} FORMAT TSV"'
+ )
+ count_result = self.execute_command(
+ count_cmd, timeout=60.0, record=False
+ )
+ if count_result.get("success", False):
+ row_count = int(count_result.get("stdout", "0").strip())
+ else:
+ self._log("Warning: Could not verify row count")
+ return True # Assume success if import succeeded
+
+ self._log(f"Successfully loaded {row_count:,} rows into {table_name}")
+ return True
+
+ except Exception as e:
+ self._log(f"Failed to load data into {table_name}: {e}")
+ return False
+
def execute_query(
self,
query: str,
diff --git a/benchkit/systems/exasol.py b/benchkit/systems/exasol.py
index 8f7717a..cd46a77 100644
--- a/benchkit/systems/exasol.py
+++ b/benchkit/systems/exasol.py
@@ -11,11 +11,11 @@
from benchkit.common.markers import exclude_from_package
+from ..util import Timer
from .base import SystemUnderTest
if TYPE_CHECKING:
# avoid cyclic dependency problems
- from ..util import Timer
from ..workloads import Workload
@@ -2109,3 +2109,53 @@ def _cleanup_disturbing_services(self, play_id: str) -> bool:
return removed > 0
except Exception:
return False
+
+ def load_data_from_url(
+ self,
+ schema_name: str,
+ table_name: str,
+ data_url: str | list[str],
+ /,
+ extension: str = ".csv",
+ **kwargs: Any,
+ ) -> bool:
+ conn = None
+
+ # split URL into bases and file names
+ data_sources: dict[Path, list[str]] = {}
+ for url in [data_url] if isinstance(data_url, str) else data_url:
+ p = Path(url)
+ prefix: Path = p.parent
+ if prefix not in data_sources:
+ data_sources[prefix] = [p.name]
+ else:
+ data_sources[prefix].append(p.name)
+
+ try:
+ conn = self._get_connection()
+ if not conn:
+ return False
+ if not self._schema_created:
+ if self._schema_exists(conn, schema_name):
+ self._schema_created = True
+ conn.execute(f"OPEN SCHEMA {schema_name}")
+
+ self._log(f"Loading {data_url} into {table_name}...")
+ base_sql = f"IMPORT INTO {schema_name}.{table_name} FROM CSV AT "
+ for host, files in data_sources.items():
+ base_sql += f"'{host}' " + " ".join([f"FILE '{f}'" for f in files])
+
+ conn.execute(base_sql)
+
+ # Verify data was loaded
+ result = conn.execute(f"SELECT COUNT(*) FROM {table_name}")
+ row_count = result.fetchone()[0]
+ self._log(f"Successfully loaded {row_count:,} rows into {table_name}")
+ return True
+
+ except Exception as e:
+ self._log(f"Failed to load data into {table_name}: {e}")
+ return False
+ finally:
+ if conn:
+ conn.close()
diff --git a/benchkit/workloads/__init__.py b/benchkit/workloads/__init__.py
index d5533a6..605dad3 100644
--- a/benchkit/workloads/__init__.py
+++ b/benchkit/workloads/__init__.py
@@ -1,13 +1,15 @@
"""Benchmark workloads."""
from .base import Workload
+from .clickbench import Clickbench
from .estuary import Estuary
from .tpch import TPCH
# Workload factory mapping
-WORKLOAD_IMPLEMENTATIONS = {
+WORKLOAD_IMPLEMENTATIONS: dict[str, type[Workload]] = {
"tpch": TPCH,
"estuary": Estuary,
+ "clickbench": Clickbench,
}
@@ -32,4 +34,11 @@ def create_workload(config: dict) -> Workload:
return WORKLOAD_IMPLEMENTATIONS[name](config)
-__all__ = ["Workload", "TPCH", "Estuary", "create_workload", "WORKLOAD_IMPLEMENTATIONS"]
+__all__ = [
+ "Workload",
+ "TPCH",
+ "Estuary",
+ "Clickbench",
+ "create_workload",
+ "WORKLOAD_IMPLEMENTATIONS",
+]
diff --git a/benchkit/workloads/base.py b/benchkit/workloads/base.py
index d298a21..a2eb9bc 100644
--- a/benchkit/workloads/base.py
+++ b/benchkit/workloads/base.py
@@ -5,9 +5,12 @@
from abc import ABC, abstractmethod
from collections.abc import Callable
from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import timedelta
from pathlib import Path
from typing import Any
+from jinja2 import Environment, FileSystemLoader
+
from ..systems.base import SystemUnderTest
@@ -19,6 +22,40 @@ def __init__(self, config: dict[str, Any]):
self.scale_factor = config.get("scale_factor", 1)
self.config = config
self.data_dir = Path(f"data/{self.name}/sf{self.scale_factor}")
+ self.workload_dir = (
+ Path(__file__).parent.parent.parent / "workloads" / self.name
+ )
+ self.template_env: Environment | None = None
+ # Store system for template resolution
+ self._current_system: SystemUnderTest | None = None
+
+ self.data_format = config.get("data_format", "tbl") # TPC-H standard format
+ self.variant = config.get("variant", "official") # Query variant to use
+ self.generator: str = config.get("generator", "")
+
+ self.system_variants = (
+ config.get("system_variants") or {}
+ ) # Per-system variant overrides
+
+ # Determine which queries to include based on include/exclude logic
+ queries_config = config.get("queries", {})
+ self.queries_include = queries_config.get("include", [])
+ self.queries_exclude = queries_config.get("exclude", [])
+
+ def get_template_env(self) -> Environment:
+ """Get the workload's jinja2 template environment"""
+ if not self.template_env:
+ self.template_env = Environment(
+ loader=FileSystemLoader(
+ [
+ self.workload_dir / "queries",
+ self.workload_dir / "setup",
+ ]
+ ),
+ trim_blocks=True,
+ lstrip_blocks=True,
+ )
+ return self.template_env
def display_name(self) -> str:
"""Return user-friendly display name for workload"""
@@ -56,7 +93,6 @@ def generate_data(self, output_dir: Path) -> bool:
"""
pass
- @abstractmethod
def create_schema(self, system: SystemUnderTest) -> bool:
"""
Create database schema and tables for the workload.
@@ -67,7 +103,20 @@ def create_schema(self, system: SystemUnderTest) -> bool:
Returns:
True if schema creation successful, False otherwise
"""
- pass
+ # Use workload specific schema name like "tpch_sf100"
+ schema: str = self.get_schema_name()
+
+ # First, create the schema using the system's method
+ if hasattr(system, "create_schema"):
+ print(f"Creating schema '{schema}'...")
+ if not system.create_schema(schema):
+ print(f"Failed to create schema '{schema}'")
+ return False
+ print(f"✓ Schema '{schema}' created successfully")
+
+ # Then create the tables using the templated script
+ print(f"Creating tables for {self.name}") # TODO: display_name()
+ return self.execute_setup_script(system, "create_tables.sql")
@abstractmethod
def load_data(self, system: SystemUnderTest) -> bool:
@@ -82,7 +131,6 @@ def load_data(self, system: SystemUnderTest) -> bool:
"""
pass
- @abstractmethod
def get_queries(self, system: SystemUnderTest | None = None) -> dict[str, str]:
"""
Get the benchmark queries.
@@ -93,7 +141,26 @@ def get_queries(self, system: SystemUnderTest | None = None) -> dict[str, str]:
Returns:
Dictionary mapping query names to SQL text
"""
- pass
+ # Use provided system or stored system
+ target_system = system or self._current_system
+ if target_system is None:
+ raise ValueError(
+ "System must be provided either as parameter or stored from previous call"
+ )
+
+ # Store system for future template resolution
+ self._current_system = target_system
+
+ # Get and log the variant being used for this system
+ variant = self._get_query_variant_for_system(target_system)
+ if variant != "official":
+ print(f"Loading '{variant}' variant queries for {target_system.kind}")
+
+ queries = {}
+ for query_name in self.get_included_queries():
+ queries[query_name] = self._get_query_sql(query_name, target_system)
+
+ return queries
@abstractmethod
def get_all_query_names(self) -> list[str]:
@@ -105,7 +172,19 @@ def get_all_query_names(self) -> list[str]:
"""
pass
- @abstractmethod
+ def get_included_queries(self) -> list[str]:
+ all_queries = self.get_all_query_names()
+
+ if self.queries_include:
+ # If include is specified, use only those queries
+ return [q for q in all_queries if q in self.queries_include]
+ elif self.queries_exclude:
+ # If exclude is specified, use all queries except excluded ones
+ return [q for q in all_queries if q not in self.queries_exclude]
+ else:
+ # If neither is specified, use all queries
+ return all_queries
+
def run_query(
self, system: SystemUnderTest, query_name: str, query_sql: str
) -> dict[str, Any]:
@@ -120,7 +199,11 @@ def run_query(
Returns:
Query execution result dictionary
"""
- pass
+ # Substitute schema name in query
+ schema_name = self.get_schema_name()
+ formatted_sql = query_sql.format(schema=schema_name)
+
+ return system.execute_query(formatted_sql, query_name=query_name)
def prepare(self, system: SystemUnderTest) -> bool:
"""
@@ -244,7 +327,7 @@ def _run_workload_sequential(
Returns:
Dictionary with 'measured' and 'warmup' keys containing results
"""
- all_queries = self.get_queries()
+ all_queries = self.get_queries(system)
measured_results = []
warmup_results = []
@@ -317,7 +400,7 @@ def _run_workload_multiuser(
Returns:
Dictionary with 'measured' and 'warmup' keys containing results
"""
- all_queries = self.get_queries()
+ all_queries = self.get_queries(system)
warmup_results = []
print(f"Running multiuser workload with {num_streams} concurrent streams")
@@ -603,6 +686,138 @@ def get_workload_info(self) -> dict[str, Any]:
"schema_name": self.get_schema_name(),
}
+ def execute_setup_script(self, system: SystemUnderTest, script_name: str) -> bool:
+ """Execute a templated setup script by rendering the jinja2 template and splitting into individual statements."""
+ try:
+ # Get system extra config for conditional features
+ system_extra = {}
+ if hasattr(system, "setup_config"):
+ system_extra = system.setup_config.get("extra", {})
+
+ # Load and render the template
+ template = self.get_template_env().get_template(script_name)
+
+ # Get node_count and cluster for multinode support
+ node_count = getattr(system, "node_count", 1)
+ cluster = getattr(system, "cluster_name", "benchmark_cluster")
+
+ rendered_sql = template.render(
+ system_kind=system.kind,
+ scale_factor=self.scale_factor,
+ schema=self.get_schema_name(),
+ system_extra=system_extra,
+ node_count=node_count,
+ cluster=cluster,
+ )
+
+ # Split SQL into individual statements and execute them one by one
+ statements = system.split_sql_statements(rendered_sql)
+
+ for idx, statement in enumerate(statements):
+ # Skip empty statements
+ if not statement.strip():
+ continue
+
+ # Calculate dynamic timeout for OPTIMIZE operations
+ # These can take a long time for large tables
+ timeout = self.calculate_statement_timeout(statement, system)
+
+ # Execute each statement individually with calculated timeout
+ result = system.execute_query(
+ statement,
+ query_name=f"setup_{script_name.replace('.sql', '')}_{idx+1}",
+ timeout=int(timeout.total_seconds()),
+ )
+
+ if not result["success"]:
+ print(
+ f"Failed to execute statement {idx+1} in {script_name}: {result.get('error', 'Unknown error')}"
+ )
+ print(f"Statement was: {statement[:200]}...")
+ return False
+
+ return True
+
+ except Exception as e:
+ print(f"Error executing setup script {script_name}: {e}")
+ return False
+
+ def _get_query_variant_for_system(self, system: SystemUnderTest) -> str:
+ """
+ Determine which query variant to use for a given system.
+
+ Args:
+ system: System under test
+
+ Returns:
+ Variant name to use for this system
+ """
+ # Check if system has a specific variant override
+ if self.system_variants and system.name in self.system_variants:
+ return str(self.system_variants[system.name])
+ # Otherwise use global variant
+ return str(self.variant)
+
+ def _get_query_sql(self, query_name: str, system: SystemUnderTest) -> str:
+ """
+ Get SQL text for a specific TPC-H query with variant and templates resolved.
+
+ Priority order for loading queries:
+ 1. variants/{variant}/{system_kind}/{query_name}.sql (system-specific variant)
+ 2. variants/{variant}/{query_name}.sql (generic variant)
+ 3. {query_name}.sql (default/official with inline conditionals)
+ """
+ try:
+ variant = self._get_query_variant_for_system(system)
+
+ # Build priority-ordered list of query paths
+ query_paths = [
+ f"variants/{variant}/{system.kind}/{query_name}.sql",
+ f"variants/{variant}/{query_name}.sql",
+ f"{query_name}.sql",
+ ]
+
+ template = None
+
+ # Try each path in order until one succeeds
+ for path in query_paths:
+ try:
+ template = self.get_template_env().get_template(path)
+ break
+ except Exception:
+ continue
+
+ if template is None:
+ raise FileNotFoundError(
+ f"Query {query_name} not found in any variant path"
+ )
+
+ # Get system extra config for conditional features
+ system_extra = {}
+ if hasattr(system, "setup_config"):
+ system_extra = system.setup_config.get("extra", {})
+
+ # Render template with variant context
+ rendered_sql = template.render(
+ system_kind=system.kind,
+ scale_factor=self.scale_factor,
+ schema=self.get_schema_name(),
+ variant=variant,
+ system_extra=system_extra,
+ )
+
+ return rendered_sql
+
+ except Exception as e:
+ print(f"Error loading query {query_name}: {e}")
+ return f"-- Error loading query {query_name}: {e}"
+
+ def calculate_statement_timeout(
+ self, statement: str, system: SystemUnderTest
+ ) -> timedelta:
+ """Default implementation: 5 minutes for any statement"""
+ return timedelta(minutes=5)
+
def get_workload_class(workload_name: str) -> type | None:
"""
diff --git a/benchkit/workloads/clickbench/README.md b/benchkit/workloads/clickbench/README.md
new file mode 100644
index 0000000..19eff61
--- /dev/null
+++ b/benchkit/workloads/clickbench/README.md
@@ -0,0 +1,8 @@
+# ClickBench
+
+## Abstract
+
+ClickBench is a benchmark developed by ClickHouse, hosted at [GitHub](https://github.com/ClickHouse/ClickBench),
+including an online result browser at [https://benchmark.clickhouse.com](https://benchmark.clickhouse.com).
+
+The benchmark encompasses 43 queries of different complexities on a single table of nearly 100 million rows.
diff --git a/benchkit/workloads/clickbench/__init__.py b/benchkit/workloads/clickbench/__init__.py
new file mode 100644
index 0000000..edacbdf
--- /dev/null
+++ b/benchkit/workloads/clickbench/__init__.py
@@ -0,0 +1 @@
+from .clickbench import Clickbench
diff --git a/benchkit/workloads/clickbench/clickbench.py b/benchkit/workloads/clickbench/clickbench.py
new file mode 100644
index 0000000..6c3060c
--- /dev/null
+++ b/benchkit/workloads/clickbench/clickbench.py
@@ -0,0 +1,46 @@
+from pathlib import Path
+from typing import Any
+
+from benchkit.systems import SystemUnderTest
+from benchkit.workloads import Workload
+
+
+class Clickbench(Workload):
+
+ CSV_SOURCE_URL: str = "https://datasets.clickhouse.com/hits_compatible/hits.csv.gz"
+
+ def __init__(self, config: dict[str, Any]):
+ super().__init__(config)
+ # override default from base
+ self.data_format = config.get("data_format", "csv")
+
+ def display_name(self) -> str:
+ # no scale factor
+ return "ClickBench"
+
+ def generate_data(self, output_dir: Path) -> bool:
+ """TODO: download (and decompress?) data from web server"""
+ if self.generator == "download":
+ # is easily implemented, all components are there in common.file_management and systems.base
+ raise NotImplementedError("downloading data before load")
+ return True
+
+ def load_data(self, system: SystemUnderTest) -> bool:
+ """Load data into the single benchmark table"""
+ system.schema = self.get_schema_name()
+ return system.load_data_from_url(
+ self.get_schema_name(), "hits", self.CSV_SOURCE_URL
+ )
+
+ def get_all_query_names(self) -> list[str]:
+ # uppercase Q, and query number with no leading zeroes
+ return [f"Q{n}" for n in range(43)]
+
+ def get_schema_name(self) -> str:
+ return "clickbench"
+
+ def estimate_filesystem_usage_gb(self, system: SystemUnderTest) -> int:
+ if system.SUPPORTS_STREAMLOAD:
+ return 0
+ # if system.SUPPORTS_LOAD_FROM_COMPRESSED:
+ return 90
diff --git a/workloads/estuary/README.md b/benchkit/workloads/estuary/README.md
similarity index 98%
rename from workloads/estuary/README.md
rename to benchkit/workloads/estuary/README.md
index 8a8c8a7..439d8fb 100644
--- a/workloads/estuary/README.md
+++ b/benchkit/workloads/estuary/README.md
@@ -33,7 +33,7 @@ This is a benchmark using the TPC-H schema and data, but different queries.
| Microsoft Fabric | DW3000c | ? | 0.99 | $340 | 343 min |
| Microsoft Fabric | DW1500c | ? | 0.48 | $290 | 604 min |
| Microsoft Fabric | DW500c | ? | x | $236 | - |
-| BigQuery | serveless | ? | 15.64 | $241 | 15 min |
+| BigQuery | serverless | ? | 15.64 | $241 | 15 min |
Notes:
diff --git a/benchkit/workloads/estuary/estuary.py b/benchkit/workloads/estuary/estuary.py
index 3dbcc65..f5d5672 100644
--- a/benchkit/workloads/estuary/estuary.py
+++ b/benchkit/workloads/estuary/estuary.py
@@ -1,16 +1,11 @@
from pathlib import Path
from typing import Any
-from jinja2 import Environment, FileSystemLoader
-
from benchkit.systems import SystemUnderTest
-from benchkit.workloads.tpch import TPCH
-
+from benchkit.workloads import Workload
-## TODO -- refactoring #14
-class Estuary(TPCH):
- """Inherits all but very few methods from the TPC-H benchmark"""
+class Estuary(Workload):
@classmethod
def get_python_dependencies(cls) -> list[str]:
"""Return Python packages required for Estuary workload."""
@@ -19,21 +14,10 @@ def get_python_dependencies(cls) -> list[str]:
def __init__(self, config: dict[str, Any]):
super().__init__(config)
- # Override workload folders
- self.workload_dir = (
- Path(__file__).parent.parent.parent / "workloads" / "estuary"
- )
- self.template_env = Environment(
- loader=FileSystemLoader(
- [self.workload_dir / "queries", self.workload_dir / "setup"]
- ),
- trim_blocks=True,
- lstrip_blocks=True,
- )
-
+ # arbitrary restriction introduced by the row calculation in load_data and the broken Faker setup
assert (
- 1 <= self.scale_factor <= 1000
- ), "estuary benchmark only supports scale factors 1 to 1000"
+ 1 <= self.scale_factor <= 100
+ ), "estuary benchmark currently only supports scale factors 1 to 100"
def get_workload_description(self) -> dict[str, Any]:
"""Return Estuary workload description."""
@@ -119,7 +103,20 @@ def estimate_filesystem_usage_gb(self, system: SystemUnderTest) -> int:
"""
Estuary workload uses streaming import where possible,
in which case it does not require local storage.
+ Otherwise, same code as TPC-H (although not 100% correct, as Estuary loads table-by-table)
"""
if system.SUPPORTS_STREAMLOAD:
return 0
- return super().estimate_filesystem_usage_gb(system)
+
+ def scale_multiplier(sf: float) -> float:
+ # 2.0 at very small sf (≈1–10), ~1.6 at 30, →1.3 for sf ≥ 100
+ # f(sf) = 1.3 + 0.7 / (1 + (sf/K)^p), with K≈26.8537, p≈2.5966
+ if sf <= 10:
+ return 2.0
+ val = 1.3 + 0.7 / (1.0 + (sf / 26.853725639548) ** 2.5965770266157073)
+ return float(max(1.3, min(val, 2.0)))
+
+ def estimate_gb(sf: float) -> int:
+ return int(max(sf * scale_multiplier(sf), 3.0))
+
+ return estimate_gb(float(self.scale_factor))
diff --git a/benchkit/workloads/tpch.py b/benchkit/workloads/tpch.py
index d7d108e..a97ab95 100644
--- a/benchkit/workloads/tpch.py
+++ b/benchkit/workloads/tpch.py
@@ -6,8 +6,7 @@
from pathlib import Path
from typing import TYPE_CHECKING, Any
-from jinja2 import Environment, FileSystemLoader
-
+from ..systems.base import SystemUnderTest
from ..util import safe_command
from .base import Workload
@@ -26,43 +25,6 @@ def get_python_dependencies(cls) -> list[str]:
def __init__(self, config: dict[str, Any]):
super().__init__(config)
- self.data_format = config.get("data_format", "tbl") # TPC-H standard format
- self.variant = config.get("variant", "official") # Query variant to use
- self.system_variants = (
- config.get("system_variants") or {}
- ) # Per-system variant overrides
-
- # Determine which queries to include based on include/exclude logic
- queries_config = config.get("queries", {})
- queries_include = queries_config.get("include", [])
- queries_exclude = queries_config.get("exclude", [])
-
- all_queries = list(self.get_all_query_names())
-
- if queries_include:
- # If include is specified, use only those queries
- self.queries_to_include = queries_include
- elif queries_exclude:
- # If exclude is specified, use all queries except excluded ones
- self.queries_to_include = [
- q for q in all_queries if q not in queries_exclude
- ]
- else:
- # If neither is specified, use all queries
- self.queries_to_include = all_queries
-
- # Setup template environment for TPC-H workload
- self.workload_dir = Path(__file__).parent.parent.parent / "workloads" / "tpch"
- self.template_env = Environment(
- loader=FileSystemLoader(
- [self.workload_dir / "queries", self.workload_dir / "setup"]
- ),
- trim_blocks=True,
- lstrip_blocks=True,
- )
-
- # Store system for template resolution
- self._current_system: SystemUnderTest | None = None
def generate_data(self, output_dir: Path) -> bool:
"""Generate TPC-H data using tpchgen-cli (modern Python approach)."""
@@ -117,111 +79,20 @@ def _generate_with_tpchgen_cli(self, output_dir: Path) -> bool:
print(f"Data generation failed: {e}")
return False
- def create_schema(self, system: SystemUnderTest) -> bool:
- """Create TPC-H schema and tables using templated setup scripts."""
- # Use TPC-H specific schema name (e.g., tpch_sf100)
- schema = self.get_schema_name()
-
- # First, create the schema using the system's method
- if hasattr(system, "create_schema"):
- print(f"Creating schema '{schema}'...")
- if not system.create_schema(schema):
- print(f"Failed to create schema '{schema}'")
- return False
- print(f"✓ Schema '{schema}' created successfully")
-
- # Then create the tables using the templated script
- print("Creating TPC-H tables...")
- return self._execute_setup_script(system, "create_tables.sql")
-
def create_indexes(self, system: SystemUnderTest) -> bool:
"""Create TPC-H indexes using templated setup scripts."""
- return self._execute_setup_script(system, "create_indexes.sql")
+ return self.execute_setup_script(system, "create_indexes.sql")
def analyze_tables(self, system: SystemUnderTest) -> bool:
"""Analyze TPC-H tables using templated setup scripts."""
- return self._execute_setup_script(system, "analyze_tables.sql")
-
- def _execute_setup_script(self, system: SystemUnderTest, script_name: str) -> bool:
- """Execute a templated setup script by splitting into individual statements."""
- try:
- # Get system extra config for conditional features
- system_extra = {}
- if hasattr(system, "setup_config"):
- system_extra = system.setup_config.get("extra", {})
-
- # Load and render the template
- template = self.template_env.get_template(script_name)
-
- # Get node_count and cluster for multinode support
- node_count = getattr(system, "node_count", 1)
- cluster = getattr(system, "cluster_name", "benchmark_cluster")
-
- rendered_sql = template.render(
- system_kind=system.kind,
- scale_factor=self.scale_factor,
- schema=self.get_schema_name(),
- system_extra=system_extra,
- node_count=node_count,
- cluster=cluster,
- )
-
- # Split SQL into individual statements and execute them one by one
- statements = self._split_sql_statements(rendered_sql)
-
- for idx, statement in enumerate(statements):
- # Skip empty statements
- if not statement.strip():
- continue
-
- # Calculate dynamic timeout for OPTIMIZE operations
- # These can take a long time for large tables
- timeout: int = int(
- self._calculate_statement_timeout(statement, system).total_seconds()
- )
-
- # Execute each statement individually with calculated timeout
- result = system.execute_query(
- statement,
- query_name=f"setup_{script_name.replace('.sql', '')}_{idx+1}",
- timeout=timeout,
- )
-
- if not result["success"]:
- print(
- f"Failed to execute statement {idx+1} in {script_name}: {result.get('error', 'Unknown error')}"
- )
- print(f"Statement was: {statement[:200]}...")
- return False
+ return self.execute_setup_script(system, "analyze_tables.sql")
- return True
-
- except Exception as e:
- print(f"Error executing setup script {script_name}: {e}")
- return False
-
- def _calculate_statement_timeout(
+ def calculate_statement_timeout(
self, statement: str, system: SystemUnderTest
) -> timedelta:
- """
- Calculate dynamic timeout for a SQL statement based on scale factor and node count.
-
- For OPTIMIZE TABLE operations on large datasets, the default timeout is often
- insufficient. This method calculates appropriate timeouts based on
- - expected table/data size
- - the operation
- - the system executing the operation (kind and node count)
-
- Args:
- statement: SQL statement to execute
- system: System under test, with its own characteristics
-
- Returns:
- Recommended statement timeout
- """
-
# Check if this is an OPTIMIZE operation (ClickHouse specific)
statement_upper = statement.upper().strip()
+
if "OPTIMIZE TABLE" in statement_upper:
# Base timeout for OPTIMIZE: 10 minutes per SF1
# Scale factor 100 = ~1000 minutes base for lineitem
@@ -258,61 +129,6 @@ def _calculate_statement_timeout(
return system.estimate_execution_time("DEFAULT", self.scale_factor)
- # note: method is not not static in case systems use different split semantics
- def _split_sql_statements(self, sql: str) -> list[str]:
- """
- Split SQL script into individual statements.
-
- Handles:
- - Semicolon-separated statements
- - SQL comments (-- and /* */)
- - Empty lines
-
- Returns:
- List of individual SQL statements
- """
- statements = []
- current_statement = []
- in_comment = False
-
- for line in sql.split("\n"):
- stripped = line.strip()
-
- # Skip SQL comments
- if stripped.startswith("--"):
- continue
-
- # Handle multi-line comments
- if "/*" in stripped:
- in_comment = True
- if "*/" in stripped:
- in_comment = False
- continue
- if in_comment:
- continue
-
- # Skip empty lines
- if not stripped:
- continue
-
- # Check if line ends with semicolon (statement terminator)
- if stripped.endswith(";"):
- # Add the line without semicolon to current statement
- current_statement.append(stripped[:-1])
- # Join and add to statements list
- statements.append("\n".join(current_statement))
- # Reset for next statement
- current_statement = []
- else:
- # Add line to current statement
- current_statement.append(stripped)
-
- # Add any remaining statement (for scripts without trailing semicolon)
- if current_statement:
- statements.append("\n".join(current_statement))
-
- return statements
-
def prepare(self, system: SystemUnderTest) -> bool:
"""Complete TPC-H setup process: generate data, create tables, load data, create indexes, analyze tables."""
print("Setting up TPC-H workload...")
@@ -398,30 +214,6 @@ def load_data(self, system: SystemUnderTest) -> bool:
return True
- def get_queries(self, system: SystemUnderTest | None = None) -> dict[str, str]:
- """Get TPC-H queries with templates and variants resolved for the target system."""
- # Use provided system or stored system
- target_system = system or self._current_system
- if target_system is None:
- raise ValueError(
- "System must be provided either as parameter or stored from previous call"
- )
-
- # Store system for future template resolution
- self._current_system = target_system
-
- # Get and log the variant being used for this system
- variant = self._get_query_variant_for_system(target_system)
- if variant != "official":
- print(f"Loading '{variant}' variant queries for {target_system.kind}")
-
- queries = {}
- for query_name in self.get_all_query_names():
- if query_name in self.queries_to_include:
- queries[query_name] = self._get_query_sql(query_name, target_system)
-
- return queries
-
def run_workload(
self,
system: SystemUnderTest,
@@ -459,16 +251,6 @@ def run_workload(
return result_dict
- def run_query(
- self, system: SystemUnderTest, query_name: str, query_sql: str
- ) -> dict[str, Any]:
- """Execute a TPC-H query."""
- # Substitute schema name in query
- schema_name = self.get_schema_name()
- formatted_sql = query_sql.format(schema=schema_name)
-
- return system.execute_query(formatted_sql, query_name=query_name)
-
def get_workload_description(self) -> dict[str, Any]:
"""Return TPC-H workload description."""
return {
@@ -640,7 +422,7 @@ def get_rendered_setup_scripts(self, system: SystemUnderTest) -> dict[str, str]:
for script_name in ["create_tables", "create_indexes", "analyze_tables"]:
try:
- template = self.template_env.get_template(f"{script_name}.sql")
+ template = self.get_template_env().get_template(f"{script_name}.sql")
system_extra = {}
if hasattr(system, "setup_config"):
system_extra = system.setup_config.get("extra", {})
@@ -668,78 +450,6 @@ def get_all_query_names(self) -> list[str]:
"""Get list of all TPC-H query names."""
return [f"Q{i:02d}" for i in range(1, 23)] # Q01 through Q22
- # Note: Table DDLs are now handled by templated setup scripts
-
- def _get_query_variant_for_system(self, system: SystemUnderTest) -> str:
- """
- Determine which query variant to use for a given system.
-
- Args:
- system: System under test
-
- Returns:
- Variant name to use for this system
- """
- # Check if system has a specific variant override
- if self.system_variants and system.name in self.system_variants:
- return str(self.system_variants[system.name])
- # Otherwise use global variant
- return str(self.variant)
-
- def _get_query_sql(self, query_name: str, system: SystemUnderTest) -> str:
- """
- Get SQL text for a specific TPC-H query with variant and templates resolved.
-
- Priority order for loading queries:
- 1. variants/{variant}/{system_kind}/{query_name}.sql (system-specific variant)
- 2. variants/{variant}/{query_name}.sql (generic variant)
- 3. {query_name}.sql (default/official with inline conditionals)
- """
- try:
- variant = self._get_query_variant_for_system(system)
-
- # Build priority-ordered list of query paths
- query_paths = [
- f"variants/{variant}/{system.kind}/{query_name}.sql",
- f"variants/{variant}/{query_name}.sql",
- f"{query_name}.sql",
- ]
-
- template = None
-
- # Try each path in order until one succeeds
- for path in query_paths:
- try:
- template = self.template_env.get_template(path)
- break
- except Exception:
- continue
-
- if template is None:
- raise FileNotFoundError(
- f"Query {query_name} not found in any variant path"
- )
-
- # Get system extra config for conditional features
- system_extra = {}
- if hasattr(system, "setup_config"):
- system_extra = system.setup_config.get("extra", {})
-
- # Render template with variant context
- rendered_sql = template.render(
- system_kind=system.kind,
- scale_factor=self.scale_factor,
- schema=self.get_schema_name(),
- variant=variant,
- system_extra=system_extra,
- )
-
- return rendered_sql
-
- except Exception as e:
- print(f"Error loading query {query_name}: {e}")
- return f"-- Error loading query {query_name}: {e}"
-
def get_schema_name(self) -> str:
"""Get the schema name for TPC-H workload."""
# ClickHouse uses 'database', Exasol uses 'schema'
diff --git a/configs/clickbench_exa_vs_ch.yaml b/configs/clickbench_exa_vs_ch.yaml
new file mode 100644
index 0000000..9e81cd4
--- /dev/null
+++ b/configs/clickbench_exa_vs_ch.yaml
@@ -0,0 +1,82 @@
+title: "ClickBench comparison for Exasol vs ClickHouse, single user"
+author: "Stefan Reich, Principal Database Consultant at Exasol AG"
+
+# Execution configuration (optional)
+execution:
+ parallel: true # Enable parallel execution of systems (default: false)
+ max_workers: 2 # Max concurrent systems (default: number of systems)
+
+env:
+ mode: "aws" # aws, gcp, azure, or local
+ region: "eu-west-1"
+ allow_external_database_access: true # Allow external access to database ports
+ # Instance configuration per system
+ instances:
+ exasol:
+ instance_type: "c6a.8xlarge" # 32 vCPUs, 64GB RAM, $1.224 per hour (+ EBS)
+ disk:
+ type: "ebs" # Use local NVMe for best performance
+ size_gb: 150
+ label: "sr-bench-exa"
+ clickhouse:
+ instance_type: "c6a.8xlarge" # 32 vCPUs, 64GB RAM, $1.224 per hour (+ EBS)
+ disk:
+ type: "ebs"
+ size_gb: 150
+ label: "sr-bench-ch"
+ os_image: "ubuntu-22.04"
+ ssh_key_name: "sr-testkey" # AWS key pair name (without .pem extension)
+ ssh_private_key_path: "~/.ssh/sr-testkey.pem" # Local private key file path
+
+systems:
+ # Baseline: Exasol with original queries (first system is baseline)
+ - name: "exasol"
+ kind: "exasol"
+ version: "2025.1.8" # Latest Exasol version
+ setup:
+ method: "installer" # Native c4 installation
+ use_additional_disk: true # Auto-detect and use additional NVMe disk
+ c4_version: "4.28.5" # Latest c4 version
+ host_addrs: "$EXASOL_PRIVATE_IP" # Will be resolved from infrastructure
+ host_external_addrs: "$EXASOL_PUBLIC_IP"
+ image_password: "exasol123"
+ db_password: "exasol456"
+ license_file: "configs/exasol.license" # License file path
+ ## current code NEEDS this schema name here
+ schema: "clickbench"
+ db_mem_size: 51200 # RAM in MB
+ extra:
+ optimizer_mode: "analytical"
+ # Exasol database parameters for analytical workload optimization
+ db_params:
+ - "-useQueryCache=0"
+ - name: "clickhouse"
+ kind: "clickhouse"
+ version: "25.10.2.65" # Latest stable ClickHouse version
+ setup:
+ method: "native" # Native APT installation
+ use_additional_disk: true # Auto-detect and use additional NVMe disk
+ data_dir: "/data/clickhouse"
+ host: "$CLICKHOUSE_PUBLIC_IP" # Use public IP for external access
+ port: 8123 # HTTP port for clickhouse-connect
+ username: "default"
+ password: "clickhouse123" # Password for benchmark user
+ database: "clickbench"
+ extra:
+ memory_limit: "50g" # Memory limit for ClickHouse
+ max_threads: "32" # Match instance vCPUs
+ max_memory_usage: "50000000000" # ~128GB in bytes
+
+workload:
+ name: "clickbench"
+ runs_per_query: 5 # Number of measured runs
+ warmup_runs: 1 # Warmup runs before measurement
+ scale_factor: 1 # ClickBench does not have a scale factor, but the template check expects one
+ generator: "url-import" # fallback to "url-download" if url-import is not supported by system
+ format: "csv"
+
+report:
+ show_boxplots: true
+ show_latency_cdf: true
+ show_bar_chart: true
+ show_heatmap: true
diff --git a/configs/clickbench_exa_vs_ch_mu.yaml b/configs/clickbench_exa_vs_ch_mu.yaml
new file mode 100644
index 0000000..2413c75
--- /dev/null
+++ b/configs/clickbench_exa_vs_ch_mu.yaml
@@ -0,0 +1,87 @@
+title: "ClickBench comparison for Exasol vs ClickHouse, 10 users"
+author: "Stefan Reich, Principal Database Consultant at Exasol AG"
+
+# Execution configuration (optional)
+execution:
+ parallel: true # Enable parallel execution of systems (default: false)
+ max_workers: 2 # Max concurrent systems (default: number of systems)
+
+env:
+ mode: "aws" # aws, gcp, azure, or local
+ region: "eu-west-1"
+ allow_external_database_access: true # Allow external access to database ports
+ # Instance configuration per system
+ instances:
+ exasol:
+ instance_type: "c6a.8xlarge" # 32 vCPUs, 64GB RAM, $1.224 per hour (+ EBS)
+ disk:
+ type: "ebs" # Use local NVMe for best performance
+ size_gb: 150
+ label: "sr-bench-exa"
+ clickhouse:
+ instance_type: "c6a.8xlarge" # 32 vCPUs, 64GB RAM, $1.224 per hour (+ EBS)
+ disk:
+ type: "ebs"
+ size_gb: 150
+ label: "sr-bench-ch"
+ os_image: "ubuntu-22.04"
+ ssh_key_name: "sr-testkey" # AWS key pair name (without .pem extension)
+ ssh_private_key_path: "~/.ssh/sr-testkey.pem" # Local private key file path
+
+systems:
+ # Baseline: Exasol with original queries (first system is baseline)
+ - name: "exasol"
+ kind: "exasol"
+ version: "2025.1.8" # Latest Exasol version
+ setup:
+ method: "installer" # Native c4 installation
+ use_additional_disk: true # Auto-detect and use additional NVMe disk
+ c4_version: "4.28.5" # Latest c4 version
+ host_addrs: "$EXASOL_PRIVATE_IP" # Will be resolved from infrastructure
+ host_external_addrs: "$EXASOL_PUBLIC_IP"
+ image_password: "exasol123"
+ db_password: "exasol456"
+ license_file: "configs/exasol.license" # License file path
+ ## current code NEEDS this schema name here
+ schema: "clickbench"
+ db_mem_size: 51200 # RAM in MB
+ extra:
+ optimizer_mode: "analytical"
+ # Exasol database parameters for analytical workload optimization
+ db_params:
+ - "-useQueryCache=0"
+ - name: "clickhouse"
+ kind: "clickhouse"
+ version: "25.10.2.65" # Latest stable ClickHouse version
+ setup:
+ method: "native" # Native APT installation
+ use_additional_disk: true # Auto-detect and use additional NVMe disk
+ data_dir: "/data/clickhouse"
+ host: "$CLICKHOUSE_PUBLIC_IP" # Use public IP for external access
+ port: 8123 # HTTP port for clickhouse-connect
+ username: "default"
+ password: "clickhouse123" # Password for benchmark user
+ database: "clickbench"
+ extra:
+ memory_limit: "50g" # Memory limit for ClickHouse
+ max_threads: "32" # Match instance vCPUs
+ max_memory_usage: "50000000000" # ~128GB in bytes
+
+workload:
+ name: "clickbench"
+ runs_per_query: 5 # Number of measured runs
+ warmup_runs: 1 # Warmup runs before measurement
+ scale_factor: 1 # ClickBench does not have a scale factor, but the template check expects one
+ generator: "url-import" # fallback to "url-download" if url-import is not supported by system
+ format: "csv"
+ multiuser:
+ enabled: true
+ num_streams: 10
+ randomize: true
+ random_seed: 64738
+
+report:
+ show_boxplots: true
+ show_latency_cdf: true
+ show_bar_chart: true
+ show_heatmap: true
diff --git a/configs/test_exasol_estuary_10g.yaml b/configs/wip_exasol_estuary_10g.yaml
similarity index 98%
rename from configs/test_exasol_estuary_10g.yaml
rename to configs/wip_exasol_estuary_10g.yaml
index 217423e..1a15b61 100644
--- a/configs/test_exasol_estuary_10g.yaml
+++ b/configs/wip_exasol_estuary_10g.yaml
@@ -1,5 +1,6 @@
title: "Exasol Standalone for estuary warehouse benchmark"
author: "Stefan Reich, Principal Database Consultant at Exasol AG"
+note: "WORK IN PROGRESS -- not stable yet"
# Execution configuration (optional)
execution:
diff --git a/pyproject.toml b/pyproject.toml
index a9bd912..6df63ef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,6 +48,8 @@ dev = [
"types-Markdown>=3.0.0",
"pandas-stubs>=2.0.0",
"boto3-stubs[sts,ec2]>=1.26.0",
+ "types-requests",
+ "types-PyYAML",
# Database drivers for testing framework (not needed in generated packages)
"pyexasol>=0.25.0",
"clickhouse-connect>=0.6.0",
diff --git a/templates/package/cli.py.j2 b/templates/package/cli.py.j2
index f27a769..c5384fc 100644
--- a/templates/package/cli.py.j2
+++ b/templates/package/cli.py.j2
@@ -16,6 +16,7 @@ console = Console()
def load_workload_config(config_path: str) -> dict[str, Any]:
"""Load workload config without full validation (minimal fields only)."""
+ config: dict
with open(config_path, "r") as f:
config = yaml.safe_load(f)
return config
diff --git a/templates/package/workload_runner.py.j2 b/templates/package/workload_runner.py.j2
index c958ac0..43d78cc 100644
--- a/templates/package/workload_runner.py.j2
+++ b/templates/package/workload_runner.py.j2
@@ -63,14 +63,11 @@ def execute_queries(config: dict[str, Any], output_dir: Path, force: bool = Fals
random_seed = multiuser_config.get("random_seed", None)
console.print(f"[dim]Multiuser mode: {num_streams} streams, randomize={randomize}, seed={random_seed}[/dim]")
- # Get query names from workload object (which handles include/exclude logic)
- query_names = workload.queries_to_include if hasattr(workload, 'queries_to_include') else []
-
# Execute queries only (data should already be loaded)
console.print(f"[dim]Running queries...[/dim]")
try:
result_dict = workload.run_workload(
- system, query_names, runs_per_query, warmup_runs,
+ system, workload.get_included_queries(), runs_per_query, warmup_runs,
num_streams, randomize, random_seed
)
# Handle dict return format (measured and warmup results)
diff --git a/tests/test_downloading.py b/tests/test_downloading.py
new file mode 100644
index 0000000..5b8ff4d
--- /dev/null
+++ b/tests/test_downloading.py
@@ -0,0 +1,65 @@
+from pathlib import Path
+
+# noinspection PyUnusedImports
+import pytest
+from requests.exceptions import ConnectionError, HTTPError, MissingSchema
+
+from benchkit.common import download_file_to_storage
+
+
+def get_temp_file_name() -> Path:
+ import tempfile
+
+ with tempfile.NamedTemporaryFile() as f:
+ return Path(f.name)
+
+
+def test_bad_url():
+ with pytest.raises(MissingSchema):
+ download_file_to_storage("hello world", Path("data.csv"))
+
+
+def test_no_server():
+ with pytest.raises(ConnectionError):
+ download_file_to_storage(
+ "https://localhost:123/data.csv.gz", Path("data.csv.gz")
+ )
+
+
+def test_no_file():
+ with pytest.raises(HTTPError) as e:
+ download_file_to_storage(
+ "https://exasol.com/data_no_such_file.tgz", Path("none")
+ )
+ assert "404" in str(e.value)
+
+
+def test_no_access():
+ with pytest.raises(HTTPError) as e:
+ download_file_to_storage(
+ "https://x-up.s3.amazonaws.com/releases/c4/linux/x86_64/no_such_version/c4",
+ Path("xxx"),
+ )
+ assert "403" in str(e.value)
+
+
+def test_good_file():
+ from hashlib import file_digest
+
+ target: Path = get_temp_file_name()
+ assert not target.exists()
+ try:
+ download_file_to_storage(
+ "https://github.githubassets.com/favicons/favicon.png", target
+ )
+ assert target.exists()
+ assert target.stat().st_size == 958
+ with open(target, "rb") as file:
+ x = file_digest(file, "sha256")
+ assert (
+ x.hexdigest()
+ == "74cf90ac2fe6624ab1056cacea11cf7ed4f8bef54bbb0e869638013bba45bc08"
+ )
+
+ finally:
+ target.unlink(missing_ok=True)
diff --git a/tests/test_workload_interface.py b/tests/test_workload_interface.py
index 466d072..5e49518 100644
--- a/tests/test_workload_interface.py
+++ b/tests/test_workload_interface.py
@@ -89,16 +89,16 @@ def test_timeout_calculation(
workload: TPCH = TPCH({"name": "tpch", "scale_factor": scale_factor})
system.node_count = node_count
- assert workload._calculate_statement_timeout(
+ assert workload.calculate_statement_timeout(
"OPTIMIZE TABLE ORDERS", system
) == timedelta(seconds=orders_timeout_seconds)
- assert workload._calculate_statement_timeout(
+ assert workload.calculate_statement_timeout(
"SELECT * FROM ORDERS", system
) == timedelta(minutes=5), "should be default timeout"
assert (
timedelta(minutes=5)
- <= workload._calculate_statement_timeout("MATERIALIZE STATISTICS", system)
+ <= workload.calculate_statement_timeout("MATERIALIZE STATISTICS", system)
<= timedelta(hours=1)
), "timeout should be within bounds"
diff --git a/workloads/clickbench/queries/Q0.sql b/workloads/clickbench/queries/Q0.sql
new file mode 100644
index 0000000..d0bc616
--- /dev/null
+++ b/workloads/clickbench/queries/Q0.sql
@@ -0,0 +1,2 @@
+SELECT COUNT(*) FROM hits;
+
diff --git a/workloads/clickbench/queries/Q1.sql b/workloads/clickbench/queries/Q1.sql
new file mode 100644
index 0000000..a4f6026
--- /dev/null
+++ b/workloads/clickbench/queries/Q1.sql
@@ -0,0 +1,2 @@
+SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0;
+
diff --git a/workloads/clickbench/queries/Q10.sql b/workloads/clickbench/queries/Q10.sql
new file mode 100644
index 0000000..9f04931
--- /dev/null
+++ b/workloads/clickbench/queries/Q10.sql
@@ -0,0 +1,10 @@
+SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u
+FROM hits
+{% if system_kind == 'exasol' %}
+WHERE MobilePhoneModel IS NOT NULL
+{% else %}
+WHERE MobilePhoneModel <> ''
+{% endif %}
+GROUP BY MobilePhoneModel
+ORDER BY u DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q11.sql b/workloads/clickbench/queries/Q11.sql
new file mode 100644
index 0000000..0500953
--- /dev/null
+++ b/workloads/clickbench/queries/Q11.sql
@@ -0,0 +1,9 @@
+SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits
+{% if system_kind == 'exasol' %}
+WHERE MobilePhoneModel IS NOT NULL
+{% else %}
+WHERE MobilePhoneModel <> ''
+{% endif %}
+GROUP BY MobilePhone, MobilePhoneModel
+ORDER BY u DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q12.sql b/workloads/clickbench/queries/Q12.sql
new file mode 100644
index 0000000..90ae056
--- /dev/null
+++ b/workloads/clickbench/queries/Q12.sql
@@ -0,0 +1,10 @@
+SELECT SearchPhrase, COUNT(*) AS c
+FROM hits
+{% if system_kind == 'exasol' %}
+WHERE SearchPhrase IS NOT NULL
+{% else %}
+WHERE SearchPhrase <> ''
+{% endif %}
+GROUP BY SearchPhrase
+ORDER BY c DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q13.sql b/workloads/clickbench/queries/Q13.sql
new file mode 100644
index 0000000..6ae1a5d
--- /dev/null
+++ b/workloads/clickbench/queries/Q13.sql
@@ -0,0 +1,10 @@
+SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u
+FROM hits
+{% if system_kind == 'exasol' %}
+WHERE SearchPhrase IS NOT NULL
+{% else %}
+WHERE SearchPhrase <> ''
+{% endif %}
+GROUP BY SearchPhrase
+ORDER BY u DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q14.sql b/workloads/clickbench/queries/Q14.sql
new file mode 100644
index 0000000..77634c5
--- /dev/null
+++ b/workloads/clickbench/queries/Q14.sql
@@ -0,0 +1,10 @@
+SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c
+FROM hits
+{% if system_kind == 'exasol' %}
+WHERE SearchPhrase IS NOT NULL
+{% else %}
+WHERE SearchPhrase <> ''
+{% endif %}
+GROUP BY SearchEngineID, SearchPhrase
+ORDER BY c DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q15.sql b/workloads/clickbench/queries/Q15.sql
new file mode 100644
index 0000000..136570c
--- /dev/null
+++ b/workloads/clickbench/queries/Q15.sql
@@ -0,0 +1,5 @@
+SELECT UserID, COUNT(*)
+FROM hits
+GROUP BY UserID
+ORDER BY COUNT(*) DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q16.sql b/workloads/clickbench/queries/Q16.sql
new file mode 100644
index 0000000..d2cacff
--- /dev/null
+++ b/workloads/clickbench/queries/Q16.sql
@@ -0,0 +1,5 @@
+SELECT UserID, SearchPhrase, COUNT(*)
+FROM hits
+GROUP BY UserID, SearchPhrase
+ORDER BY COUNT(*) DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q17.sql b/workloads/clickbench/queries/Q17.sql
new file mode 100644
index 0000000..8bb3e68
--- /dev/null
+++ b/workloads/clickbench/queries/Q17.sql
@@ -0,0 +1,4 @@
+SELECT UserID, SearchPhrase, COUNT(*)
+FROM hits
+GROUP BY UserID, SearchPhrase LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q18.sql b/workloads/clickbench/queries/Q18.sql
new file mode 100644
index 0000000..0b7718a
--- /dev/null
+++ b/workloads/clickbench/queries/Q18.sql
@@ -0,0 +1,5 @@
+SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*)
+FROM hits
+GROUP BY UserID, extract(minute FROM EventTime), SearchPhrase
+ORDER BY COUNT(*) DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q19.sql b/workloads/clickbench/queries/Q19.sql
new file mode 100644
index 0000000..0d01c7a
--- /dev/null
+++ b/workloads/clickbench/queries/Q19.sql
@@ -0,0 +1,2 @@
+SELECT UserID FROM hits WHERE UserID = 435090932899640449;
+
diff --git a/workloads/clickbench/queries/Q2.sql b/workloads/clickbench/queries/Q2.sql
new file mode 100644
index 0000000..22bad77
--- /dev/null
+++ b/workloads/clickbench/queries/Q2.sql
@@ -0,0 +1,2 @@
+SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;
+
diff --git a/workloads/clickbench/queries/Q20.sql b/workloads/clickbench/queries/Q20.sql
new file mode 100644
index 0000000..7cc5b51
--- /dev/null
+++ b/workloads/clickbench/queries/Q20.sql
@@ -0,0 +1,2 @@
+SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
+
diff --git a/workloads/clickbench/queries/Q21.sql b/workloads/clickbench/queries/Q21.sql
new file mode 100644
index 0000000..00807f9
--- /dev/null
+++ b/workloads/clickbench/queries/Q21.sql
@@ -0,0 +1,11 @@
+SELECT SearchPhrase, MIN(URL), COUNT(*) AS c
+FROM hits
+WHERE URL LIKE '%google%'
+{% if system_kind == 'exasol' %}
+AND SearchPhrase IS NOT NULL
+{% else %}
+AND SearchPhrase <> ''
+{% endif %}
+GROUP BY SearchPhrase
+ORDER BY c DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q22.sql b/workloads/clickbench/queries/Q22.sql
new file mode 100644
index 0000000..2b2f167
--- /dev/null
+++ b/workloads/clickbench/queries/Q22.sql
@@ -0,0 +1,11 @@
+SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID)
+FROM hits
+WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%'
+{% if system_kind == 'exasol' %}
+AND SearchPhrase IS NOT NULL
+{% else %}
+AND SearchPhrase <> ''
+{% endif %}
+GROUP BY SearchPhrase
+ORDER BY c DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q23.sql b/workloads/clickbench/queries/Q23.sql
new file mode 100644
index 0000000..599b096
--- /dev/null
+++ b/workloads/clickbench/queries/Q23.sql
@@ -0,0 +1,2 @@
+SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q24.sql b/workloads/clickbench/queries/Q24.sql
new file mode 100644
index 0000000..a9d8955
--- /dev/null
+++ b/workloads/clickbench/queries/Q24.sql
@@ -0,0 +1,8 @@
+SELECT SearchPhrase FROM hits
+{% if system_kind == 'exasol' %}
+WHERE SearchPhrase IS NOT NULL
+{% else %}
+WHERE SearchPhrase <> ''
+{% endif %}
+ORDER BY EventTime LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q25.sql b/workloads/clickbench/queries/Q25.sql
new file mode 100644
index 0000000..4581dd9
--- /dev/null
+++ b/workloads/clickbench/queries/Q25.sql
@@ -0,0 +1,8 @@
+SELECT SearchPhrase FROM hits
+{% if system_kind == 'exasol' %}
+WHERE SearchPhrase IS NOT NULL
+{% else %}
+WHERE SearchPhrase <> ''
+{% endif %}
+ORDER BY SearchPhrase LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q26.sql b/workloads/clickbench/queries/Q26.sql
new file mode 100644
index 0000000..212138e
--- /dev/null
+++ b/workloads/clickbench/queries/Q26.sql
@@ -0,0 +1,8 @@
+SELECT SearchPhrase FROM hits
+{% if system_kind == 'exasol' %}
+WHERE SearchPhrase IS NOT NULL
+{% else %}
+WHERE SearchPhrase <> ''
+{% endif %}
+ORDER BY EventTime, SearchPhrase LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q27.sql b/workloads/clickbench/queries/Q27.sql
new file mode 100644
index 0000000..5218fac
--- /dev/null
+++ b/workloads/clickbench/queries/Q27.sql
@@ -0,0 +1,11 @@
+SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c
+FROM hits
+{% if system_kind == 'exasol' %}
+WHERE URL IS NOT NULL
+{% else %}
+WHERE URL <> ''
+{% endif %}
+GROUP BY CounterID
+HAVING COUNT(*) > 100000
+ORDER BY l DESC LIMIT 25;
+
diff --git a/workloads/clickbench/queries/Q28.sql b/workloads/clickbench/queries/Q28.sql
new file mode 100644
index 0000000..50b4d9c
--- /dev/null
+++ b/workloads/clickbench/queries/Q28.sql
@@ -0,0 +1,11 @@
+SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer)
+FROM hits
+{% if system_kind == 'exasol' %}
+WHERE Referer IS NOT NULL
+{% else %}
+WHERE Referer <> ''
+{% endif %}
+GROUP BY REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1')
+HAVING COUNT(*) > 100000
+ORDER BY l DESC LIMIT 25;
+
diff --git a/workloads/clickbench/queries/Q29.sql b/workloads/clickbench/queries/Q29.sql
new file mode 100644
index 0000000..62f4f3c
--- /dev/null
+++ b/workloads/clickbench/queries/Q29.sql
@@ -0,0 +1,3 @@
+SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89)
+FROM hits;
+
diff --git a/workloads/clickbench/queries/Q3.sql b/workloads/clickbench/queries/Q3.sql
new file mode 100644
index 0000000..deef647
--- /dev/null
+++ b/workloads/clickbench/queries/Q3.sql
@@ -0,0 +1,2 @@
+SELECT AVG(UserID) FROM hits;
+
diff --git a/workloads/clickbench/queries/Q30.sql b/workloads/clickbench/queries/Q30.sql
new file mode 100644
index 0000000..31fe84b
--- /dev/null
+++ b/workloads/clickbench/queries/Q30.sql
@@ -0,0 +1,10 @@
+SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth)
+FROM hits
+{% if system_kind == 'exasol' %}
+WHERE SearchPhrase IS NOT NULL
+{% else %}
+WHERE SearchPhrase <> ''
+{% endif %}
+GROUP BY SearchEngineID, ClientIP
+ORDER BY c DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q31.sql b/workloads/clickbench/queries/Q31.sql
new file mode 100644
index 0000000..00bbebd
--- /dev/null
+++ b/workloads/clickbench/queries/Q31.sql
@@ -0,0 +1,10 @@
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth)
+FROM hits
+{% if system_kind == 'exasol' %}
+WHERE SearchPhrase IS NOT NULL
+{% else %}
+WHERE SearchPhrase <> ''
+{% endif %}
+GROUP BY WatchID, ClientIP
+ORDER BY c DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q32.sql b/workloads/clickbench/queries/Q32.sql
new file mode 100644
index 0000000..68e399d
--- /dev/null
+++ b/workloads/clickbench/queries/Q32.sql
@@ -0,0 +1,2 @@
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q33.sql b/workloads/clickbench/queries/Q33.sql
new file mode 100644
index 0000000..ba10b7a
--- /dev/null
+++ b/workloads/clickbench/queries/Q33.sql
@@ -0,0 +1,2 @@
+SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q34.sql b/workloads/clickbench/queries/Q34.sql
new file mode 100644
index 0000000..e36a81f
--- /dev/null
+++ b/workloads/clickbench/queries/Q34.sql
@@ -0,0 +1,2 @@
+SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q35.sql b/workloads/clickbench/queries/Q35.sql
new file mode 100644
index 0000000..c37e5dd
--- /dev/null
+++ b/workloads/clickbench/queries/Q35.sql
@@ -0,0 +1,5 @@
+SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c
+FROM hits
+GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3
+ORDER BY c DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q36.sql b/workloads/clickbench/queries/Q36.sql
new file mode 100644
index 0000000..e5686ac
--- /dev/null
+++ b/workloads/clickbench/queries/Q36.sql
@@ -0,0 +1,11 @@
+SELECT URL, COUNT(*) AS PageViews
+FROM hits
+WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0
+{% if system_kind == 'exasol' %}
+AND URL IS NOT NULL
+{% else %}
+AND URL <> ''
+{% endif %}
+GROUP BY URL
+ORDER BY PageViews DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q37.sql b/workloads/clickbench/queries/Q37.sql
new file mode 100644
index 0000000..4529a6f
--- /dev/null
+++ b/workloads/clickbench/queries/Q37.sql
@@ -0,0 +1,11 @@
+SELECT Title, COUNT(*) AS PageViews
+FROM hits
+WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0
+{% if system_kind == 'exasol' %}
+AND Title IS NOT NULL
+{% else %}
+AND Title <> ''
+{% endif %}
+GROUP BY Title
+ORDER BY PageViews DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q38.sql b/workloads/clickbench/queries/Q38.sql
new file mode 100644
index 0000000..2111856
--- /dev/null
+++ b/workloads/clickbench/queries/Q38.sql
@@ -0,0 +1,6 @@
+SELECT URL, COUNT(*) AS PageViews
+FROM hits
+WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0
+GROUP BY URL
+ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+
diff --git a/workloads/clickbench/queries/Q39.sql b/workloads/clickbench/queries/Q39.sql
new file mode 100644
index 0000000..47112c9
--- /dev/null
+++ b/workloads/clickbench/queries/Q39.sql
@@ -0,0 +1,6 @@
+SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews
+FROM hits
+WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0
+GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END, URL
+ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+
diff --git a/workloads/clickbench/queries/Q4.sql b/workloads/clickbench/queries/Q4.sql
new file mode 100644
index 0000000..fa54f20
--- /dev/null
+++ b/workloads/clickbench/queries/Q4.sql
@@ -0,0 +1,2 @@
+SELECT COUNT(DISTINCT UserID) FROM hits;
+
diff --git a/workloads/clickbench/queries/Q40.sql b/workloads/clickbench/queries/Q40.sql
new file mode 100644
index 0000000..66b41db
--- /dev/null
+++ b/workloads/clickbench/queries/Q40.sql
@@ -0,0 +1,6 @@
+SELECT URLHash, EventDate, COUNT(*) AS PageViews
+FROM hits
+WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465
+GROUP BY URLHash, EventDate
+ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
+
diff --git a/workloads/clickbench/queries/Q41.sql b/workloads/clickbench/queries/Q41.sql
new file mode 100644
index 0000000..54935b4
--- /dev/null
+++ b/workloads/clickbench/queries/Q41.sql
@@ -0,0 +1,6 @@
+SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews
+FROM hits
+WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622
+GROUP BY WindowClientWidth, WindowClientHeight
+ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
+
diff --git a/workloads/clickbench/queries/Q42.sql b/workloads/clickbench/queries/Q42.sql
new file mode 100644
index 0000000..060741f
--- /dev/null
+++ b/workloads/clickbench/queries/Q42.sql
@@ -0,0 +1,6 @@
+SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews
+FROM hits
+WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0
+GROUP BY DATE_TRUNC('minute', EventTime)
+ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000;
+
diff --git a/workloads/clickbench/queries/Q5.sql b/workloads/clickbench/queries/Q5.sql
new file mode 100644
index 0000000..7e8f7cd
--- /dev/null
+++ b/workloads/clickbench/queries/Q5.sql
@@ -0,0 +1,2 @@
+SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
+
diff --git a/workloads/clickbench/queries/Q6.sql b/workloads/clickbench/queries/Q6.sql
new file mode 100644
index 0000000..18f4f88
--- /dev/null
+++ b/workloads/clickbench/queries/Q6.sql
@@ -0,0 +1,2 @@
+SELECT MIN(EventDate), MAX(EventDate) FROM hits;
+
diff --git a/workloads/clickbench/queries/Q7.sql b/workloads/clickbench/queries/Q7.sql
new file mode 100644
index 0000000..d1faebd
--- /dev/null
+++ b/workloads/clickbench/queries/Q7.sql
@@ -0,0 +1,6 @@
+SELECT AdvEngineID, COUNT(*)
+FROM hits
+WHERE AdvEngineID <> 0
+GROUP BY AdvEngineID
+ORDER BY COUNT(*) DESC;
+
diff --git a/workloads/clickbench/queries/Q8.sql b/workloads/clickbench/queries/Q8.sql
new file mode 100644
index 0000000..67de842
--- /dev/null
+++ b/workloads/clickbench/queries/Q8.sql
@@ -0,0 +1,5 @@
+SELECT RegionID, COUNT(DISTINCT UserID) AS u
+FROM hits
+GROUP BY RegionID
+ORDER BY u DESC LIMIT 10;
+
diff --git a/workloads/clickbench/queries/Q9.sql b/workloads/clickbench/queries/Q9.sql
new file mode 100644
index 0000000..2e88978
--- /dev/null
+++ b/workloads/clickbench/queries/Q9.sql
@@ -0,0 +1,5 @@
+SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID)
+FROM hits
+GROUP BY RegionID
+ORDER BY c DESC LIMIT 10;
+
diff --git a/workloads/clickbench/setup/create_tables.sql b/workloads/clickbench/setup/create_tables.sql
new file mode 100644
index 0000000..1568974
--- /dev/null
+++ b/workloads/clickbench/setup/create_tables.sql
@@ -0,0 +1,237 @@
+-- TPC-H Table Creation Script
+-- Creates all 8 TPC-H tables with appropriate data types and storage options for each database system
+
+{% if system_kind == 'exasol' %}
+-- Exasol table creation
+CREATE OR REPLACE TABLE {{ schema }}.hits
+(
+ WatchID BIGINT NOT NULL,
+ JavaEnable SMALLINT NOT NULL,
+ Title VARCHAR(2000000),
+ GoodEvent SMALLINT NOT NULL,
+ EventTime TIMESTAMP NOT NULL,
+ EventDate Date NOT NULL,
+ CounterID INTEGER NOT NULL,
+ ClientIP INTEGER NOT NULL,
+ RegionID INTEGER NOT NULL,
+ UserID BIGINT NOT NULL,
+ CounterClass SMALLINT NOT NULL,
+ OS SMALLINT NOT NULL,
+ UserAgent SMALLINT NOT NULL,
+ URL VARCHAR(2000000),
+ Referer VARCHAR(2000000),
+ IsRefresh SMALLINT NOT NULL,
+ RefererCategoryID SMALLINT NOT NULL,
+ RefererRegionID INTEGER NOT NULL,
+ URLCategoryID SMALLINT NOT NULL,
+ URLRegionID INTEGER NOT NULL,
+ ResolutionWidth SMALLINT NOT NULL,
+ ResolutionHeight SMALLINT NOT NULL,
+ ResolutionDepth SMALLINT NOT NULL,
+ FlashMajor SMALLINT NOT NULL,
+ FlashMinor SMALLINT NOT NULL,
+ FlashMinor2 VARCHAR(2000000),
+ NetMajor SMALLINT NOT NULL,
+ NetMinor SMALLINT NOT NULL,
+ UserAgentMajor SMALLINT NOT NULL,
+ UserAgentMinor VARCHAR(255),
+ CookieEnable SMALLINT NOT NULL,
+ JavascriptEnable SMALLINT NOT NULL,
+ IsMobile SMALLINT NOT NULL,
+ MobilePhone SMALLINT NOT NULL,
+ MobilePhoneModel VARCHAR(2000000),
+ Params VARCHAR(2000000),
+ IPNetworkID INTEGER NOT NULL,
+ TraficSourceID SMALLINT NOT NULL,
+ SearchEngineID SMALLINT NOT NULL,
+ SearchPhrase VARCHAR(2000000),
+ AdvEngineID SMALLINT NOT NULL,
+ IsArtifical SMALLINT NOT NULL,
+ WindowClientWidth SMALLINT NOT NULL,
+ WindowClientHeight SMALLINT NOT NULL,
+ ClientTimeZone SMALLINT NOT NULL,
+ ClientEventTime TIMESTAMP NOT NULL,
+ SilverlightVersion1 SMALLINT NOT NULL,
+ SilverlightVersion2 SMALLINT NOT NULL,
+ SilverlightVersion3 INTEGER NOT NULL,
+ SilverlightVersion4 SMALLINT NOT NULL,
+ PageCharset VARCHAR(2000000),
+ CodeVersion INTEGER NOT NULL,
+ IsLink SMALLINT NOT NULL,
+ IsDownload SMALLINT NOT NULL,
+ IsNotBounce SMALLINT NOT NULL,
+ FUniqID BIGINT NOT NULL,
+ OriginalURL VARCHAR(2000000),
+ HID INTEGER NOT NULL,
+ IsOldCounter SMALLINT NOT NULL,
+ IsEvent SMALLINT NOT NULL,
+ IsParameter SMALLINT NOT NULL,
+ DontCountHits SMALLINT NOT NULL,
+ WithHash SMALLINT NOT NULL,
+ HitColor CHAR NOT NULL,
+ LocalEventTime TIMESTAMP NOT NULL,
+ Age SMALLINT NOT NULL,
+ Sex SMALLINT NOT NULL,
+ Income SMALLINT NOT NULL,
+ Interests SMALLINT NOT NULL,
+ Robotness SMALLINT NOT NULL,
+ RemoteIP INTEGER NOT NULL,
+ WindowName INTEGER NOT NULL,
+ OpenerName INTEGER NOT NULL,
+ HistoryLength SMALLINT NOT NULL,
+ BrowserLanguage VARCHAR(2000000),
+ BrowserCountry VARCHAR(2000000),
+ SocialNetwork VARCHAR(2000000),
+ SocialAction VARCHAR(2000000),
+ HTTPError SMALLINT NOT NULL,
+ SendTiming INTEGER NOT NULL,
+ DNSTiming INTEGER NOT NULL,
+ ConnectTiming INTEGER NOT NULL,
+ ResponseStartTiming INTEGER NOT NULL,
+ ResponseEndTiming INTEGER NOT NULL,
+ FetchTiming INTEGER NOT NULL,
+ SocialSourceNetworkID SMALLINT NOT NULL,
+ SocialSourcePage VARCHAR(2000000),
+ ParamPrice BIGINT NOT NULL,
+ ParamOrderID VARCHAR(2000000),
+ ParamCurrency VARCHAR(2000000),
+ ParamCurrencyID SMALLINT NOT NULL,
+ OpenstatServiceName VARCHAR(2000000),
+ OpenstatCampaignID VARCHAR(2000000),
+ OpenstatAdID VARCHAR(2000000),
+ OpenstatSourceID VARCHAR(2000000),
+ UTMSource VARCHAR(2000000),
+ UTMMedium VARCHAR(2000000),
+ UTMCampaign VARCHAR(2000000),
+ UTMContent VARCHAR(2000000),
+ UTMTerm VARCHAR(2000000),
+ FromTag VARCHAR(2000000),
+ HasGCLID SMALLINT NOT NULL,
+ RefererHash BIGINT NOT NULL,
+ URLHash BIGINT NOT NULL,
+ CLID INTEGER NOT NULL,
+ PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID),
+ PARTITION BY EventDate
+);
+{% elif system_kind == 'clickhouse' %}
+-- ClickHouse table creation
+{% if node_count > 1 %}
+-- MULTINODE CLUSTER: Creating local tables on all nodes + distributed tables
+{{ UNSUPPORTED_SYSTEM_KIND_ERROR_FOR[system_kind] }}
+{% else %}
+-- Single node setup
+CREATE TABLE hits
+(
+ WatchID BIGINT NOT NULL,
+ JavaEnable SMALLINT NOT NULL,
+ Title TEXT NOT NULL,
+ GoodEvent SMALLINT NOT NULL,
+ EventTime TIMESTAMP NOT NULL,
+ EventDate Date NOT NULL,
+ CounterID INTEGER NOT NULL,
+ ClientIP INTEGER NOT NULL,
+ RegionID INTEGER NOT NULL,
+ UserID BIGINT NOT NULL,
+ CounterClass SMALLINT NOT NULL,
+ OS SMALLINT NOT NULL,
+ UserAgent SMALLINT NOT NULL,
+ URL TEXT NOT NULL,
+ Referer TEXT NOT NULL,
+ IsRefresh SMALLINT NOT NULL,
+ RefererCategoryID SMALLINT NOT NULL,
+ RefererRegionID INTEGER NOT NULL,
+ URLCategoryID SMALLINT NOT NULL,
+ URLRegionID INTEGER NOT NULL,
+ ResolutionWidth SMALLINT NOT NULL,
+ ResolutionHeight SMALLINT NOT NULL,
+ ResolutionDepth SMALLINT NOT NULL,
+ FlashMajor SMALLINT NOT NULL,
+ FlashMinor SMALLINT NOT NULL,
+ FlashMinor2 TEXT NOT NULL,
+ NetMajor SMALLINT NOT NULL,
+ NetMinor SMALLINT NOT NULL,
+ UserAgentMajor SMALLINT NOT NULL,
+ UserAgentMinor VARCHAR(255) NOT NULL,
+ CookieEnable SMALLINT NOT NULL,
+ JavascriptEnable SMALLINT NOT NULL,
+ IsMobile SMALLINT NOT NULL,
+ MobilePhone SMALLINT NOT NULL,
+ MobilePhoneModel TEXT NOT NULL,
+ Params TEXT NOT NULL,
+ IPNetworkID INTEGER NOT NULL,
+ TraficSourceID SMALLINT NOT NULL,
+ SearchEngineID SMALLINT NOT NULL,
+ SearchPhrase TEXT NOT NULL,
+ AdvEngineID SMALLINT NOT NULL,
+ IsArtifical SMALLINT NOT NULL,
+ WindowClientWidth SMALLINT NOT NULL,
+ WindowClientHeight SMALLINT NOT NULL,
+ ClientTimeZone SMALLINT NOT NULL,
+ ClientEventTime TIMESTAMP NOT NULL,
+ SilverlightVersion1 SMALLINT NOT NULL,
+ SilverlightVersion2 SMALLINT NOT NULL,
+ SilverlightVersion3 INTEGER NOT NULL,
+ SilverlightVersion4 SMALLINT NOT NULL,
+ PageCharset TEXT NOT NULL,
+ CodeVersion INTEGER NOT NULL,
+ IsLink SMALLINT NOT NULL,
+ IsDownload SMALLINT NOT NULL,
+ IsNotBounce SMALLINT NOT NULL,
+ FUniqID BIGINT NOT NULL,
+ OriginalURL TEXT NOT NULL,
+ HID INTEGER NOT NULL,
+ IsOldCounter SMALLINT NOT NULL,
+ IsEvent SMALLINT NOT NULL,
+ IsParameter SMALLINT NOT NULL,
+ DontCountHits SMALLINT NOT NULL,
+ WithHash SMALLINT NOT NULL,
+ HitColor CHAR NOT NULL,
+ LocalEventTime TIMESTAMP NOT NULL,
+ Age SMALLINT NOT NULL,
+ Sex SMALLINT NOT NULL,
+ Income SMALLINT NOT NULL,
+ Interests SMALLINT NOT NULL,
+ Robotness SMALLINT NOT NULL,
+ RemoteIP INTEGER NOT NULL,
+ WindowName INTEGER NOT NULL,
+ OpenerName INTEGER NOT NULL,
+ HistoryLength SMALLINT NOT NULL,
+ BrowserLanguage TEXT NOT NULL,
+ BrowserCountry TEXT NOT NULL,
+ SocialNetwork TEXT NOT NULL,
+ SocialAction TEXT NOT NULL,
+ HTTPError SMALLINT NOT NULL,
+ SendTiming INTEGER NOT NULL,
+ DNSTiming INTEGER NOT NULL,
+ ConnectTiming INTEGER NOT NULL,
+ ResponseStartTiming INTEGER NOT NULL,
+ ResponseEndTiming INTEGER NOT NULL,
+ FetchTiming INTEGER NOT NULL,
+ SocialSourceNetworkID SMALLINT NOT NULL,
+ SocialSourcePage TEXT NOT NULL,
+ ParamPrice BIGINT NOT NULL,
+ ParamOrderID TEXT NOT NULL,
+ ParamCurrency TEXT NOT NULL,
+ ParamCurrencyID SMALLINT NOT NULL,
+ OpenstatServiceName TEXT NOT NULL,
+ OpenstatCampaignID TEXT NOT NULL,
+ OpenstatAdID TEXT NOT NULL,
+ OpenstatSourceID TEXT NOT NULL,
+ UTMSource TEXT NOT NULL,
+ UTMMedium TEXT NOT NULL,
+ UTMCampaign TEXT NOT NULL,
+ UTMContent TEXT NOT NULL,
+ UTMTerm TEXT NOT NULL,
+ FromTag TEXT NOT NULL,
+ HasGCLID SMALLINT NOT NULL,
+ RefererHash BIGINT NOT NULL,
+ URLHash BIGINT NOT NULL,
+ CLID INTEGER NOT NULL,
+ PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID)
+);
+{% endif %}
+{% else %}
+{{ UNSUPPORTED_SYSTEM_KIND_ERROR_FOR[system_kind] }}
+{% endif %}
+
+