redis · slice4e · Oct 16, 2025 · Oct 16, 2025 · Oct 16, 2025 · Oct 16, 2025
diff --git a/commands.json b/commands.json
diff --git a/groups.json b/groups.json
@@ -1,70 +1,74 @@
 {
   "bitmap": {
-    "display": "Bitmap",
-    "description": "Operations on the Bitmap data type"
+    "description": "Operations on the Bitmap data type",
+    "display": "Bitmap"
   },
   "cluster": {
-    "display": "Cluster",
-    "description": "Redis Cluster management"
+    "description": "Redis Cluster management",
+    "display": "Cluster"
   },
   "connection": {
-    "display": "Connection",
-    "description": "Client connections management"
+    "description": "Client connections management",
+    "display": "Connection"
   },
   "generic": {
-    "display": "Generic",
-    "description": "Generic commands"
+    "description": "Generic commands",
+    "display": "Generic"
   },
   "geo": {
-    "display": "Geospatial indices",
-    "description": "Operations on the Geospatial Index data type"
+    "description": "Operations on the Geospatial Index data type",
+    "display": "Geospatial indices"
   },
   "hash": {
-    "display": "Hash",
-    "description": "Operations on the Hash data type"
+    "description": "Operations on the Hash data type",
+    "display": "Hash"
   },
   "hyperloglog": {
-    "display": "HyperLogLog",
-    "description": "Operations on the HyperLogLog data type"
+    "description": "Operations on the HyperLogLog data type",
+    "display": "HyperLogLog"
   },
+  "json": [
+    "JSON.GET",
+    "JSON.SET"
+  ],
   "list": {
-    "display": "List",
-    "description": "Operations on the List data type"
+    "description": "Operations on the List data type",
+    "display": "List"
   },
   "pubsub": {
-    "display": "Pub/Sub",
-    "description": "Pub/Sub commands"
+    "description": "Pub/Sub commands",
+    "display": "Pub/Sub"
   },
   "scripting": {
-    "display": "Scripting and Functions",
-    "description": "Redis server-side scripting and functions"
+    "description": "Redis server-side scripting and functions",
+    "display": "Scripting and Functions"
   },
   "sentinel": {
-    "display": "Sentinel",
-    "description": "Redis Sentinel commands"
+    "description": "Redis Sentinel commands",
+    "display": "Sentinel"
   },
   "server": {
-    "display": "Server",
-    "description": "Server management commands"
+    "description": "Server management commands",
+    "display": "Server"
   },
   "set": {
-    "display": "Set",
-    "description": "Operations on the Set data type"
+    "description": "Operations on the Set data type",
+    "display": "Set"
   },
   "sorted-set": {
-    "display": "Sorted Set",
-    "description": "Operations on the Sorted Set data type"
+    "description": "Operations on the Sorted Set data type",
+    "display": "Sorted Set"
   },
   "stream": {
-    "display": "Stream",
-    "description": "Operations on the Stream data type"
+    "description": "Operations on the Stream data type",
+    "display": "Stream"
   },
   "string": {
-    "display": "String",
-    "description": "Operations on the String data type"
+    "description": "Operations on the String data type",
+    "display": "String"
   },
   "transactions": {
-    "display": "Transactions",
-    "description": "Redis Transaction management"
+    "description": "Redis Transaction management",
+    "display": "Transactions"
   }
-}
+}
diff --git a/redis_benchmarks_specification/__cli__/cli.py b/redis_benchmarks_specification/__cli__/cli.py
@@ -80,6 +80,22 @@ def trigger_tests_dockerhub_cli_command_logic(args, project_name, project_versio
         decode_responses=False,
     )
     conn.ping()
+
+    # Extract version from Docker image tag if possible
+    # e.g., "redis:7.4.0" -> "7.4.0"
+    # e.g., "valkey/valkey:7.2.6-bookworm" -> "7.2.6"
+    git_version = None
+    if ":" in args.run_image:
+        tag = args.run_image.split(":")[-1]
+        # Try to extract version number from tag
+        # Common patterns: "7.4.0", "7.2.6-bookworm", "latest"
+        import re
+
+        version_match = re.match(r"^(\d+\.\d+\.\d+)", tag)
+        if version_match:
+            git_version = version_match.group(1)
+            logging.info(f"Extracted git_version '{git_version}' from image tag")
+
     testDetails = {}
     build_stream_fields, result = generate_benchmark_stream_request(
         args.id,
@@ -96,7 +112,7 @@ def trigger_tests_dockerhub_cli_command_logic(args, project_name, project_versio
         None,
         None,
         None,
-        None,
+        git_version,  # Pass extracted version
         None,
         None,
         None,

diff --git a/redis_benchmarks_specification/__runner__/runner.py b/redis_benchmarks_specification/__runner__/runner.py
@@ -175,13 +175,20 @@ def validate_benchmark_metrics(
     Args:
         results_dict: Dictionary containing benchmark results
         test_name: Name of the test being validated
-        benchmark_config: Benchmark configuration (unused, for compatibility)
+        benchmark_config: Benchmark configuration (optional, contains tested-commands)
         default_metrics: Default metrics configuration (unused, for compatibility)
 
     Returns:
         tuple: (is_valid, error_message)
     """
     try:
+        # Get tested commands from config if available
+        tested_commands = []
+        if benchmark_config and "tested-commands" in benchmark_config:
+            tested_commands = [
+                cmd.lower() for cmd in benchmark_config["tested-commands"]
+            ]
+
         # Define validation rules
         throughput_patterns = [
             "ops/sec",
@@ -219,6 +226,29 @@ def check_nested_dict(data, path=""):
                 ):
                     return
 
+                # Skip operation-specific metrics for operations not being tested
+                # For example, skip Gets.Ops/sec if only SET commands are tested
+                if tested_commands:
+                    skip_metric = False
+                    operation_types = [
+                        "gets",
+                        "sets",
+                        "hgets",
+                        "hsets",
+                        "lpush",
+                        "rpush",
+                        "sadd",
+                    ]
+                    for op_type in operation_types:
+                        if (
+                            op_type in metric_path_lower
+                            and op_type not in tested_commands
+                        ):
+                            skip_metric = True
+                            break
+                    if skip_metric:
+                        return
+
                 # Check throughput metrics
                 for pattern in throughput_patterns:
                     if pattern in metric_path_lower:
@@ -2672,13 +2702,20 @@ def delete_temporary_files(
                             if not success:
                                 logging.error(f"Memtier benchmark failed: {stderr}")
                                 # Clean up database after failure (timeout or error)
-                                if args.flushall_on_every_test_end or args.flushall_on_every_test_start:
-                                    logging.warning("Benchmark failed - cleaning up database with FLUSHALL")
+                                if (
+                                    args.flushall_on_every_test_end
+                                    or args.flushall_on_every_test_start
+                                ):
+                                    logging.warning(
+                                        "Benchmark failed - cleaning up database with FLUSHALL"
+                                    )
                                     try:
                                         for r in redis_conns:
                                             r.flushall()
                                     except Exception as e:
-                                        logging.error(f"FLUSHALL failed after benchmark failure: {e}")
+                                        logging.error(
+                                            f"FLUSHALL failed after benchmark failure: {e}"
+                                        )
                                 # Continue with the test but log the failure
                                 client_container_stdout = f"ERROR: {stderr}"
 
@@ -2848,6 +2885,14 @@ def delete_temporary_files(
                         )
                         results_dict = json.loads(client_container_stdout)
 
+                        # Write aggregated results to file so it can be preserved
+                        full_result_path = local_benchmark_output_filename
+                        with open(full_result_path, "w") as json_file:
+                            json.dump(results_dict, json_file, indent=2)
+                        logging.info(
+                            f"Wrote aggregated multi-client results to {full_result_path}"
+                        )
+
                         # Validate benchmark metrics
                         is_valid, validation_error = validate_benchmark_metrics(
                             results_dict, test_name, benchmark_config, default_metrics
@@ -3025,8 +3070,13 @@ def delete_temporary_files(
                     test_result = False
 
                     # Clean up database after exception to prevent contamination of next test
-                    if args.flushall_on_every_test_end or args.flushall_on_every_test_start:
-                        logging.warning("Exception caught - cleaning up database with FLUSHALL")
+                    if (
+                        args.flushall_on_every_test_end
+                        or args.flushall_on_every_test_start
+                    ):
+                        logging.warning(
+                            "Exception caught - cleaning up database with FLUSHALL"
+                        )
                         try:
                             for r in redis_conns:
                                 r.flushall()

diff --git a/...pecification/test-suites/memtier_benchmark-1Mkeys-string-set-with-ex-100B-pipeline-10.yml b/...pecification/test-suites/memtier_benchmark-1Mkeys-string-set-with-ex-100B-pipeline-10.yml
@@ -12,7 +12,7 @@ dbconfig:
     tool: memtier_benchmark
     arguments: '"--data-size" "100" "--ratio" "1:0" "--key-pattern" "P:P" "-c" "50"
       "-t" "2" "--hide-histogram" "--key-minimum" "1"  "--key-maximum" "1000000" -n
-      allkeys" --pipeline 50'
+      "allkeys" --pipeline 50'
   resources:
     requests:
       memory: 1g

diff --git a/..._specification/test-suites/memtier_benchmark-playbook-rate-limiting-lua-100k-sessions.yml b/..._specification/test-suites/memtier_benchmark-playbook-rate-limiting-lua-100k-sessions.yml
@@ -34,7 +34,6 @@ tested-commands:
 - bitcount
 - eval
 tested-groups:
-- bitmap
 - scripting
 
 redis-topologies:

diff --git a/...tion/test-suites/memtier_benchmark-playbook-realtime-analytics-membership-pipeline-10.yml b/...tion/test-suites/memtier_benchmark-playbook-realtime-analytics-membership-pipeline-10.yml
@@ -35,6 +35,7 @@ dbconfig:
 tested-commands:
 - smembers
 - sdiff
+- sunion
 redis-topologies:
 - oss-standalone
 build-variants:

diff --git a/...ks_specification/test-suites/memtier_benchmark-playbook-realtime-analytics-membership.yml b/...ks_specification/test-suites/memtier_benchmark-playbook-realtime-analytics-membership.yml
@@ -35,6 +35,7 @@ dbconfig:
 tested-commands:
 - smembers
 - sdiff
+- sunion
 redis-topologies:
 - oss-standalone
 build-variants: