diff --git a/tests/python_client/check/func_check.py b/tests/python_client/check/func_check.py index 08a3f2ee91173..4475f2f4948f9 100644 --- a/tests/python_client/check/func_check.py +++ b/tests/python_client/check/func_check.py @@ -251,8 +251,8 @@ def check_describe_collection_property(res, func_name, check_items): assert res["enable_dynamic_field"] == check_items.get("enable_dynamic_field", True) if check_items.get("num_partitions", 1): assert res["num_partitions"] == check_items.get("num_partitions", 1) - if check_items.get("id_name", "id"): - assert res["fields"][0]["name"] == check_items.get("id_name", "id") + if check_items.get("primary_field", None) is not None: + assert res["fields"][0]["name"] == check_items.get("primary_field") if check_items.get("vector_name", "vector"): assert res["fields"][1]["name"] == check_items.get("vector_name", "vector") if check_items.get("dim", None) is not None: @@ -372,13 +372,14 @@ def check_search_results(search_res, func_name, check_items): log.info("search_results_check: Numbers of query searched is correct") enable_milvus_client_api = check_items.get("enable_milvus_client_api", False) # log.debug(search_res) + pk_name = check_items.get('primary_field', 'id') for hits in search_res: searched_original_vectors = [] ids = [] vector_id = 0 if enable_milvus_client_api: for hit in hits: - ids.append(hit['id']) + ids.append(hit[pk_name]) else: ids = list(hits.ids) if (len(hits) != check_items["limit"]) \ diff --git a/tests/python_client/common/common_type.py b/tests/python_client/common/common_type.py index bae1c122b57c1..c2e5dc2d6d973 100644 --- a/tests/python_client/common/common_type.py +++ b/tests/python_client/common/common_type.py @@ -220,10 +220,12 @@ "SPARSE_INVERTED_INDEX", "SPARSE_WAND", "GPU_IVF_FLAT", "GPU_IVF_PQ"] +inverted_index_algo = ['TAAT_NAIVE', 'DAAT_WAND', 'DAAT_MAXSCORE'] + default_all_indexes_params = [{}, {"nlist": 128}, {"nlist": 128}, {"nlist": 128, "m": 16, "nbits": 8}, {"M": 32, "efConstruction": 360}, {"nlist": 128}, {}, {}, {"nlist": 64}, - {"drop_ratio_build": 0.2}, {"drop_ratio_build": 0.2}, + {}, {"drop_ratio_build": 0.2}, {"nlist": 64}, {"nlist": 64, "m": 16, "nbits": 8}] default_all_search_params_params = [{}, {"nprobe": 32}, {"nprobe": 32}, {"nprobe": 32}, diff --git a/tests/python_client/milvus_client/test_milvus_client_collection.py b/tests/python_client/milvus_client/test_milvus_client_collection.py index 0c2ff3ff8cb96..d8b9382f6a13d 100644 --- a/tests/python_client/milvus_client/test_milvus_client_collection.py +++ b/tests/python_client/milvus_client/test_milvus_client_collection.py @@ -306,7 +306,7 @@ def test_milvus_client_collection_self_creation_default(self, nullable): "consistency_level": 0, "enable_dynamic_field": False, "num_partitions": 16, - "id_name": "id_string", + "primary_field": "id_string", "vector_name": "embeddings"} if nullable: check_items["nullable_fields"] = ["nullable_field", "array_field"] diff --git a/tests/python_client/milvus_client/test_milvus_client_search.py b/tests/python_client/milvus_client/test_milvus_client_search.py index 5177845ea33d2..89d70aac23551 100644 --- a/tests/python_client/milvus_client/test_milvus_client_search.py +++ b/tests/python_client/milvus_client/test_milvus_client_search.py @@ -247,39 +247,54 @@ def test_milvus_client_rename_search_query_default(self): client = self._client() collection_name = cf.gen_unique_str(prefix) # 1. create collection - self.create_collection(client, collection_name, default_dim, consistency_level="Bounded") + schema = self.create_schema(client, enable_dynamic_field=True)[0] + pk_name = 'pk_varchar' + schema.add_field(pk_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(default_float_field_name, DataType.FLOAT) + self.create_collection(client, collection_name, schema=schema, consistency_level="Bounded") collections = self.list_collections(client)[0] assert collection_name in collections self.describe_collection(client, collection_name, check_task=CheckTasks.check_describe_collection_property, check_items={"collection_name": collection_name, - "dim": default_dim, + "dim": default_dim, "primary_field": pk_name, "consistency_level": 0}) old_name = collection_name new_name = collection_name + "new" self.rename_collection(client, old_name, new_name) # 2. insert rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + rows = [{pk_name: str(i), + default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_string_field_name: str(i), + default_float_field_name: i*1.0 + } for i in range(default_nb)] self.insert(client, new_name, rows) self.flush(client, new_name) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_index(client, new_name, index_params=index_params) + self.load_collection(client, new_name) # assert self.num_entities(client, collection_name)[0] == default_nb # 3. search vectors_to_search = rng.random((1, default_dim)) - insert_ids = [i for i in range(default_nb)] + insert_ids = [str(i) for i in range(default_nb)] self.search(client, new_name, vectors_to_search, check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), - "ids": insert_ids, + "ids": insert_ids, "primary_field": pk_name, "limit": default_limit}) # 4. query - self.query(client, new_name, filter=default_search_exp, + filter = f"{default_float_field_name} >= 0" + self.query(client, new_name, filter=filter, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "primary_field": pk_name}) self.release_collection(client, new_name) self.drop_collection(client, new_name) diff --git a/tests/python_client/requirements.txt b/tests/python_client/requirements.txt index 19ac58dd1f0ae..5e7ca5433393b 100644 --- a/tests/python_client/requirements.txt +++ b/tests/python_client/requirements.txt @@ -28,8 +28,8 @@ pytest-parallel pytest-random-order # pymilvus -pymilvus==2.5.3 -pymilvus[bulk_writer]==2.5.3 +pymilvus==2.5.5rc5 +pymilvus[bulk_writer]==2.5.5rc5 # for customize config test python-benedict==0.24.3 diff --git a/tests/python_client/testcases/async_milvus_client/test_e2e_async.py b/tests/python_client/testcases/async_milvus_client/test_e2e_async.py index e5fb3eed1a5e2..e46859b9008a6 100644 --- a/tests/python_client/testcases/async_milvus_client/test_e2e_async.py +++ b/tests/python_client/testcases/async_milvus_client/test_e2e_async.py @@ -281,6 +281,7 @@ async def test_async_client_with_schema(self, schema): "params": {"ef": "96"}}, check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, + "primary_field": ct.default_string_field_name, "nq": ct.default_nq, "limit": ct.default_limit}) tasks.append(default_search_task) @@ -307,6 +308,7 @@ async def test_async_client_with_schema(self, schema): check_task=CheckTasks.check_search_results, check_items={ "enable_milvus_client_api": True, + "primary_field": ct.default_string_field_name, "nq": ct.default_nq, "limit": 5}) tasks.append(filter_params_search_task) diff --git a/tests/python_client/testcases/test_full_text_search.py b/tests/python_client/testcases/test_full_text_search.py index 6a82ea09c7f8c..9160071890912 100644 --- a/tests/python_client/testcases/test_full_text_search.py +++ b/tests/python_client/testcases/test_full_text_search.py @@ -2315,9 +2315,10 @@ def test_full_text_search_default( @pytest.mark.parametrize("expr", ["text_match"]) @pytest.mark.parametrize("offset", [10]) @pytest.mark.parametrize("tokenizer", ["jieba"]) + @pytest.mark.parametrize("inverted_index_algo", ct.inverted_index_algo) def test_full_text_search_with_jieba_tokenizer( - self, offset, tokenizer, expr, enable_inverted_index, enable_partition_key, empty_percent, index_type, nq - ): + self, offset, tokenizer, expr, enable_inverted_index, enable_partition_key, + empty_percent, index_type, nq, inverted_index_algo): """ target: test full text search method: 1. enable full text search with jieba tokenizer and insert data with varchar @@ -2430,6 +2431,7 @@ def test_full_text_search_with_jieba_tokenizer( "params": { "bm25_k1": 1.5, "bm25_b": 0.75, + "inverted_index_algo": inverted_index_algo } } ) @@ -3302,8 +3304,9 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase): @pytest.mark.parametrize("enable_inverted_index", [True]) @pytest.mark.parametrize("index_type", ["SPARSE_INVERTED_INDEX"]) @pytest.mark.parametrize("tokenizer", ["standard"]) + @pytest.mark.parametrize("inverted_index_algo", ct.inverted_index_algo) def test_hybrid_search_with_full_text_search( - self, tokenizer, enable_inverted_index, enable_partition_key, empty_percent, index_type + self, tokenizer, enable_inverted_index, enable_partition_key, empty_percent, index_type, inverted_index_algo ): """ target: test full text search @@ -3403,6 +3406,7 @@ def test_hybrid_search_with_full_text_search( "params": { "bm25_k1": 1.5, "bm25_b": 0.75, + "inverted_index_algo": inverted_index_algo } } ) diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index 66afe37830e8f..cdb9a3f5dc264 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -1484,6 +1484,28 @@ def test_invalid_sparse_ratio(self, ratio, index): check_task=CheckTasks.err_res, check_items=error) + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("inverted_index_algo", ["INVALID_ALGO"]) + @pytest.mark.parametrize("index ", ct.all_index_types[9:11]) + def test_invalid_sparse_inverted_index_algo(self, inverted_index_algo, index): + """ + target: index creation for unsupported ratio parameter + method: indexing of unsupported ratio parameters + expected: raise exception + """ + c_name = cf.gen_unique_str(prefix) + schema = cf.gen_default_sparse_schema() + collection_w = self.init_collection_wrap(name=c_name, schema=schema) + data = cf.gen_default_list_sparse_data() + collection_w.insert(data=data) + params = {"index_type": index, "metric_type": "IP", "params": {"inverted_index_algo": inverted_index_algo}} + error = {ct.err_code: 999, + ct.err_msg: f"sparse inverted index algo {inverted_index_algo} not found or not supported, " + f"supported: [TAAT_NAIVE DAAT_WAND DAAT_MAXSCORE]"} + index, _ = self.index_wrap.init_index(collection_w.collection, ct.default_sparse_vec_field_name, params, + check_task=CheckTasks.err_res, + check_items=error) + @pytest.mark.tags(CaseLabel.GPU) class TestNewIndexAsync(TestcaseBase): diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index 2a57bc7299483..595663d83594b 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -3104,24 +3104,23 @@ def test_search_with_expression(self, null_data_percent): assert set(ids).issubset(filter_ids_set) # 5. search again with expression template and search hints - if expr != "": # TODO: remove this when issue #39013 is fixed - search_param = default_search_params.copy() - search_param.update({"hints": "iterative_filter"}) - search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, - search_param, nb, - expr=expr, expr_params=expr_params, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": min(nb, len(filter_ids)), - "_async": _async}) - if _async: - search_res.done() - search_res = search_res.result() - filter_ids_set = set(filter_ids) - for hits in search_res: - ids = hits.ids - assert set(ids).issubset(filter_ids_set) + search_param = default_search_params.copy() + search_param.update({"hints": "iterative_filter"}) + search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, + search_param, nb, + expr=expr, expr_params=expr_params, _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": min(nb, len(filter_ids)), + "_async": _async}) + if _async: + search_res.done() + search_res = search_res.result() + filter_ids_set = set(filter_ids) + for hits in search_res: + ids = hits.ids + assert set(ids).issubset(filter_ids_set) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("bool_type", [True, False, "true", "false"]) @@ -12860,7 +12859,8 @@ class TestSparseSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("index", ct.all_index_types[9:11]) - def test_sparse_index_search(self, index): + @pytest.mark.parametrize("inverted_index_algo", ct.inverted_index_algo) + def test_sparse_index_search(self, index, inverted_index_algo): """ target: verify that sparse index for sparse vectors can be searched properly method: create connection, collection, insert and search @@ -12873,12 +12873,16 @@ def test_sparse_index_search(self, index): data = cf.gen_default_list_sparse_data(nb=3000) collection_w.insert(data) params = cf.get_index_params_params(index) + params.update({"inverted_index_algo": inverted_index_algo}) index_params = {"index_type": index, "metric_type": "IP", "params": params} collection_w.create_index(ct.default_sparse_vec_field_name, index_params, index_name=index) collection_w.load() + _params = cf.get_search_params_params(index) + _params.update({"dim_max_score_ratio": 1.05}) + search_params = {"params": _params} collection_w.search(data[-1][0:default_nq], ct.default_sparse_vec_field_name, - ct.default_sparse_search_params, default_limit, + search_params, default_limit, output_fields=[ct.default_sparse_vec_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, @@ -12887,7 +12891,7 @@ def test_sparse_index_search(self, index): "output_fields": [ct.default_sparse_vec_field_name]}) expr = "int64 < 100 " collection_w.search(data[-1][0:default_nq], ct.default_sparse_vec_field_name, - ct.default_sparse_search_params, default_limit, + search_params, default_limit, expr=expr, output_fields=[ct.default_sparse_vec_field_name], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, @@ -12923,7 +12927,8 @@ def test_sparse_index_dim(self, index, dim): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("index", ct.all_index_types[9:11]) - def test_sparse_index_enable_mmap_search(self, index): + @pytest.mark.parametrize("inverted_index_algo", ct.inverted_index_algo) + def test_sparse_index_enable_mmap_search(self, index, inverted_index_algo): """ target: verify that the sparse indexes of sparse vectors can be searched properly after turning on mmap method: create connection, collection, enable mmap, insert and search @@ -12939,6 +12944,7 @@ def test_sparse_index_enable_mmap_search(self, index): collection_w.insert(data) params = cf.get_index_params_params(index) + params.update({"inverted_index_algo": inverted_index_algo}) index_params = {"index_type": index, "metric_type": "IP", "params": params} collection_w.create_index(ct.default_sparse_vec_field_name, index_params, index_name=index) @@ -12968,9 +12974,9 @@ def test_sparse_index_enable_mmap_search(self, index): assert len(res) == 4 @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("ratio", [0.01, 0.1, 0.5, 0.9]) + @pytest.mark.parametrize("drop_ratio_build", [0.01]) @pytest.mark.parametrize("index", ct.all_index_types[9:11]) - def test_search_sparse_ratio(self, ratio, index): + def test_search_sparse_ratio(self, drop_ratio_build, index): """ target: create a sparse index by adjusting the ratio parameter. method: create a sparse index by adjusting the ratio parameter. @@ -12982,16 +12988,28 @@ def test_search_sparse_ratio(self, ratio, index): collection_w = self.init_collection_wrap(c_name, schema=schema) data = cf.gen_default_list_sparse_data(nb=4000) collection_w.insert(data) - params = {"index_type": index, "metric_type": "IP", "params": {"drop_ratio_build": ratio}} + params = {"index_type": index, "metric_type": "IP", "params": {"drop_ratio_build": drop_ratio_build}} collection_w.create_index(ct.default_sparse_vec_field_name, params, index_name=index) collection_w.load() assert collection_w.has_index(index_name=index)[0] is True - search_params = {"metric_type": "IP", "params": {"drop_ratio_search": ratio}} - collection_w.search(data[-1][0:default_nq], ct.default_sparse_vec_field_name, - search_params, default_limit, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": default_limit}) + _params = {"drop_ratio_search": 0.2} + for dim_max_score_ratio in [0.5, 0.99, 1, 1.3]: + _params.update({"dim_max_score_ratio": dim_max_score_ratio}) + search_params = {"metric_type": "IP", "params": _params} + collection_w.search(data[-1][0:default_nq], ct.default_sparse_vec_field_name, + search_params, default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "limit": default_limit}) + error = {ct.err_code: 999, + ct.err_msg: "should be in range [0.500000, 1.300000]"} + for invalid_ratio in [0.49, 1.4]: + _params.update({"dim_max_score_ratio": invalid_ratio}) + search_params = {"metric_type": "IP", "params": _params} + collection_w.search(data[-1][0:default_nq], ct.default_sparse_vec_field_name, + search_params, default_limit, + check_task=CheckTasks.err_res, + check_items=error) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("index", ct.all_index_types[9:11]) @@ -13024,8 +13042,8 @@ def test_sparse_vector_search_output_field(self, index): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("index", ct.all_index_types[9:11]) - @pytest.mark.xfail(reason="issue #36174") - def test_sparse_vector_search_iterator(self, index): + @pytest.mark.parametrize("inverted_index_algo", ct.inverted_index_algo) + def test_sparse_vector_search_iterator(self, index, inverted_index_algo): """ target: create sparse vectors and search iterator method: create sparse vectors and search iterator @@ -13038,6 +13056,7 @@ def test_sparse_vector_search_iterator(self, index): data = cf.gen_default_list_sparse_data(nb=4000) collection_w.insert(data) params = cf.get_index_params_params(index) + params.update({"inverted_index_algo": inverted_index_algo}) index_params = {"index_type": index, "metric_type": "IP", "params": params} collection_w.create_index(ct.default_sparse_vec_field_name, index_params, index_name=index) diff --git a/tests/python_client/utils/util_pymilvus.py b/tests/python_client/utils/util_pymilvus.py index 44a7533c79840..ac19798434248 100644 --- a/tests/python_client/utils/util_pymilvus.py +++ b/tests/python_client/utils/util_pymilvus.py @@ -8,7 +8,7 @@ import numpy as np import requests from sklearn import preprocessing -from pymilvus import Milvus, DataType +from pymilvus import MilvusClient, DataType from utils.util_log import test_log as log from utils.util_k8s import init_k8s_client_config @@ -115,9 +115,9 @@ def get_milvus(host, port, uri=None, handler=None, **kwargs): handler = "GRPC" try_connect = kwargs.get("try_connect", True) if uri is not None: - milvus = Milvus(uri=uri, handler=handler, try_connect=try_connect) + milvus = MilvusClient(uri=uri, handler=handler, try_connect=try_connect) else: - milvus = Milvus(host=host, port=port, handler=handler, try_connect=try_connect) + milvus = MilvusClient(uri=f"http://{host}:{port}", handler=handler, try_connect=try_connect) return milvus