Merge pull request #39 from grace-sng7/test_tuebingen_validation

amit-sharma · web-flow · commit 9184e56475af · 2025-04-02T16:24:11.000+05:30
Test classes for suggesters #2
diff --git a/pywhyllm/suggesters/tuebingen_model_suggester.py b/pywhyllm/suggesters/tuebingen_model_suggester.py
@@ -17,11 +17,11 @@ class Strategy(Enum):
 
 
 class TuebingenModelSuggester(ModelSuggester):
-    def __init__(self, llm):
+    def __init__(self, llm=None):
         super().__init__(llm)
 
     def suggest_description(
-            self, variable, context=None, ask_reference=False
+            self, variable, ask_reference=False
     ):
         generate_description = self._build_description_program(variable)
 
@@ -255,11 +255,11 @@ def _build_relationship_program(
                         the answer within the tags, <answer>Yes/No</answer>, and the most influential reference within 
                         the tags <reference>Author, Title, Year of publication</reference>.
                         \n\n\n----------------\n\n\n<answer>Yes</answer>\n<reference>Author, Title, Year of 
-                        publication</reference>\n\n\n----------------\n\n\n<answer>No</answer> {{~/user}}"""
+                        publication</reference>\n\n\n----------------\n\n\n<answer>No</answer>"""
                 else:
                     query["user"] += """When consensus is reached, thinking carefully and factually, explain the council's answer. 
                     Provide the answer within the tags, <answer>Yes/No</answer>.
-                        \n\n\n----------------\n\n\n<answer>Yes</answer>\n\n\n----------------\n\n\n<answer>No</answer> {{~/user}}"""
+                        \n\n\n----------------\n\n\n<answer>Yes</answer>\n\n\n----------------\n\n\n<answer>No</answer>"""
 
             elif use_strategy == Strategy.CoT:
                 if use_description:
diff --git a/pywhyllm/suggesters/validation_suggester.py b/pywhyllm/suggesters/validation_suggester.py
diff --git a/pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py b/pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py
@@ -0,0 +1,62 @@
+# TESTS
+variable = "water"
+variable_a = "water intake"
+description_a = "the amount of water a person drinks per day"
+variable_b = "hydration level"
+description_b = "the level of hydration in the body"
+domain = "biology"
+
+# MOCK_RESPONSES
+test_suggest_description_expected_response = "<description>Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states.</description>"
+test_suggest_onesided_relationship_a_cause_b_expected_response = "<answer>A</answer>"
+test_suggest_onesided_relationship_a_not_cause_b_expected_response = "<answer>B</answer>"
+test_suggest_relationship_a_cause_b_expected_response = "<answer>Yes</answer> <reference>Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458.</reference>"
+test_suggest_relationship_a_not_cause_b_expected_response = "<answer>No</answer> <reference>Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458.</reference>"
+
+# ASSERTIONS
+test_suggest_description_expected_result = ([
+                                                "Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states."],
+                                            [])
+test_suggest_onesided_relationship_a_cause_b_expected_result = 1
+test_suggest_onesided_relationship_a_not_cause_b_expected_result = 0
+test__build_description_program_no_context_no_reference_expected_result = {
+    'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal \n            is to provide factual and succinct description of the given concept.',
+    'user': " Describe the concept of water.\n                    In one sentence, provide a factual and succinct description of water\n                        Let's think step-by-step to make sure that we have a proper and clear description. Then provide \n                        your final answer within the tags, <description></description>."}
+test__build_description_program_no_context_with_reference_expected_result = {
+    'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal \n            is to provide factual and succinct description of the given concept.',
+    'user': ' Describe the concept of water.\n                    In one sentence, provide a factual and succinct description of water"\n                        Then provide two research papers that support your description.\n                        Let\'s think step-by-step to make sure that we have a proper and clear description. Then provide \n                        your final answer within the tags, <description></description>, and each research paper within the \n                        tags <paper></paper>.'}
+test__build_description_program_with_context_with_reference_expected_result = {
+    'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal is \n            to provide factual and succinct descriptions related to the given concept and context.',
+    'user': "Using this context about the particular variable, describe the concept of water.\n            In one sentence, provide a factual and succinct description of waterThen provide two research papers that support your description.\n                Let's think step-by-step to make sure that we have a proper and clear description. Then provide your final \n                answer within the tags, <description></description>, and each research paper within the tags <reference></reference>."}
+test__build_description_program_with_context_no_reference_expected_result = {
+    'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal is \n            to provide factual and succinct descriptions related to the given concept and context.',
+    'user': "Using this context about the particular variable, describe the concept of water.\n            In one sentence, provide a factual and succinct description of water\n                    Let's think step-by-step to make sure that we have a proper and clear description. Then provide your final \n                    answer within the tags, <description></description>."}
+test_suggest_relationship_a_cause_b_expected_result = (1,
+                                                       [
+                                                           'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.'])
+test_suggest_relationship_a_not_cause_b_expected_result = (0,
+                                                           [
+                                                               'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.'])
+test__build_relationship_program_expected_result = {
+    'system': 'You are a helpful assistant on causal reasoning and biology. Your '
+              'goal is to answer \n'
+              '            questions about cause and effect in a factual and '
+              'concise way.',
+    'user': 'can changing water intake change hydration level? Answer Yes or '
+            'No.When consensus is reached, thinking carefully and factually, '
+            "explain the council's answer. \n"
+            '                    Provide the answer within the tags, '
+            '<answer>Yes/No</answer>.\n'
+            '                        \n'
+            '\n'
+            '\n'
+            '----------------\n'
+            '\n'
+            '\n'
+            '<answer>Yes</answer>\n'
+            '\n'
+            '\n'
+            '----------------\n'
+            '\n'
+            '\n'
+            '<answer>No</answer>'}
diff --git a/pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py b/pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py
@@ -0,0 +1,61 @@
+# TESTS
+test_vars = ["smoking", "lung cancer", "exercise habits", "air pollution exposure"]
+domain_expertises = ['Epidemiology']
+
+# MOCK RESPONSES
+test_latent_confounders_expected_response = "<confounding_factor>socio-economic status</confounding_factor> <confounding_factor>mental health</confounding_factor>"
+test_negative_controls_expected_response = "<negative_control>exercise habits</negative_control>"
+test_parent_critique_expected_response = "None"
+test_children_critique_expected_response = "<influenced_factor>lung cancer</influenced_factor>"
+test_pairwise_critique_expected_response = "The answer is <answer>A</answer>"
+test_critique_graph_parent_expected_response = ["None",
+                                                "<influencing_factor>smoking</influencing_factor> <influencing_factor>air pollution exposure</influencing_factor>",
+                                                "<influencing_factor>air pollution exposure</influencing_factor>",
+                                                "None"]
+test_critique_graph_children_expected_response = ["<influenced_factor>lung cancer</influenced_factor>",
+                                                  "<influenced_factor>exercise habits</influenced_factor>",
+                                                  "<influenced_factor>lung cancer</influenced_factor>",
+                                                  "<influenced_factor>lung cancer</influenced_factor> <influenced_factor>exercise habits</influenced_factor>"]
+test_critique_graph_pairwise_expected_response = ["<answer>A</answer>", "<answer>A</answer>", "<answer>C</answer>",
+                                                  "<answer>B</answer>", "<answer>B</answer>", "<answer>B</answer>"]
+
+# ASSERTIONS
+test_suggest_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1},
+                                                    [{'mental health': 1, 'socio-economic status': 1},
+                                                     ['socio-economic status', 'mental health']])
+test_request_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1},
+                                                    ['socio-economic status', 'mental health'])
+test_suggest_negative_controls_expected_results = (
+{'exercise habits': 1}, [{'exercise habits': 1}, ['exercise habits']])
+test_request_negative_controls_expected_results = ({'exercise habits': 1}, ['exercise habits'])
+test_parent_critique_expected_results = []
+test_children_critique_expected_results = ['lung cancer']
+test_pairwise_critique_expected_results = ('smoking', 'lung cancer')
+test_critique_graph_parent_expected_results = ({('air pollution exposure', 'exercise habits'): 1,
+                                                ('air pollution exposure', 'lung cancer'): 1,
+                                                ('air pollution exposure', 'smoking'): 1,
+                                                ('smoking', 'lung cancer'): 1},
+                                               {('air pollution exposure', 'exercise habits'): 1,
+                                                ('air pollution exposure', 'lung cancer'): 1,
+                                                ('smoking', 'lung cancer'): 1})
+test_critique_graph_children_expected_results = ({('air pollution exposure', 'smoking'): 1,
+                                                  ('exercise habits', 'air pollution exposure'): 1,
+                                                  ('exercise habits', 'smoking'): 1,
+                                                  ('lung cancer', 'air pollution exposure'): 1,
+                                                  ('lung cancer', 'exercise habits'): 1,
+                                                  ('lung cancer', 'smoking'): 1},
+                                                 {('exercise habits', 'air pollution exposure'): 1,
+                                                  ('exercise habits', 'lung cancer'): 1,
+                                                  ('lung cancer', 'air pollution exposure'): 1,
+                                                  ('lung cancer', 'exercise habits'): 1,
+                                                  ('lung cancer', 'smoking'): 1})
+test_critique_graph_pairwise_expected_results = ({('air pollution exposure', 'exercise habits'): 1,
+                                                  ('exercise habits', 'lung cancer'): 1,
+                                                  ('smoking', 'air pollution exposure'): 1,
+                                                  ('smoking', 'exercise habits'): 1,
+                                                  ('smoking', 'lung cancer'): 1},
+                                                 {('smoking', 'lung cancer'): 1,
+                                                  ('smoking', 'exercise habits'): 1,
+                                                  ('exercise habits', 'lung cancer'): 1,
+                                                  ('air pollution exposure', 'lung cancer'): 1,
+                                                  ('air pollution exposure', 'exercise habits'): 1})
diff --git a/pywhyllm/tests/model_suggester/test_identification_suggester.py b/pywhyllm/tests/model_suggester/test_identification_suggester.py
@@ -3,13 +3,20 @@
 from guidance.models._openai import OpenAI
 
 from pywhyllm.suggesters.identification_suggester import IdentificationSuggester
+from pywhyllm.suggesters.model_suggester import ModelSuggester
 from pywhyllm.tests.model_suggester.data_providers.model_suggester_data_provider import *
 from pywhyllm.tests.model_suggester.data_providers.identification_suggester_data_provider import *
-from pywhyllm.tests.model_suggester.test_model_suggester import TestModelSuggester
 
 class TestIdentificationSuggester(unittest.TestCase):
     def test_suggest_backdoor(self):
-        return TestModelSuggester().test_suggest_confounders()
+        modeler = IdentificationSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+        mock_model_suggester = MagicMock(spec=ModelSuggester)
+        modeler.model_suggester = mock_model_suggester
+        mock_model_suggester.suggest_confounders = MagicMock(return_value=test_suggest_confounders_expected_results)
+        result = modeler.suggest_backdoor(test_vars[0], test_vars[1], test_vars, test_domain_expertises_expected_result)
+        assert result == test_suggest_confounders_expected_results
 
     def test_suggest_mediators(self):
         modeler = IdentificationSuggester()
diff --git a/pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py b/pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py
@@ -0,0 +1,77 @@
+import unittest
+from unittest.mock import MagicMock
+from guidance.models._openai import OpenAI
+
+from pywhyllm.suggesters.tuebingen_model_suggester import TuebingenModelSuggester, Strategy
+from pywhyllm.tests.model_suggester.data_providers.tuebingen_model_suggester_data_provider import *
+
+
+class TestTuebingenModelSuggester(unittest.TestCase):
+    def test_suggest_description(self):
+        modeler = TuebingenModelSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        mock_llm.__add__ = MagicMock(return_value=mock_llm)
+        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_description_expected_response)
+        result = modeler.suggest_description(variable, True)
+        assert result == test_suggest_description_expected_result
+
+    def test_suggest_onesided_relationship(self):
+        modeler = TuebingenModelSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        mock_llm.__add__ = MagicMock(return_value=mock_llm)
+        #Given the two variables and their descriptions, variable a causes variable b
+        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_a_cause_b_expected_response)
+        result = modeler.suggest_onesided_relationship(variable_a, description_a, variable_b, description_b)
+        assert result == test_suggest_onesided_relationship_a_cause_b_expected_result
+
+        #Given the two variables and their descriptions, variable a does not cause variable b
+        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_a_not_cause_b_expected_response)
+        result = modeler.suggest_onesided_relationship(variable_a, description_a, variable_b, description_b)
+        assert result == test_suggest_onesided_relationship_a_not_cause_b_expected_result
+
+    def test__build_description_program(self):
+        modeler = TuebingenModelSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+        #Test no context, no reference
+        result = modeler._build_description_program(variable, False, False)
+        assert result == test__build_description_program_no_context_no_reference_expected_result
+        #Test no context, with reference
+        result = modeler._build_description_program(variable, False, True)
+        assert result == test__build_description_program_no_context_with_reference_expected_result
+        #Test with context, no reference
+        result = modeler._build_description_program(variable, True, False)
+        assert result == test__build_description_program_with_context_no_reference_expected_result
+        #Test with context, with reference
+        result = modeler._build_description_program(variable, True, True)
+        assert result == test__build_description_program_with_context_with_reference_expected_result
+
+    def test_suggest_relationship(self):
+        modeler = TuebingenModelSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        mock_llm.__add__ = MagicMock(return_value=mock_llm)
+        #Given the two variables and their descriptions, variable a causes variable b
+        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_a_cause_b_expected_response)
+        result = modeler.suggest_relationship(variable_a, variable_b, description_a, description_b, domain,
+                                              strategy=Strategy.ToT_Single, ask_reference=True)
+        assert result == test_suggest_relationship_a_cause_b_expected_result
+        #Given the two variables and their descriptions, variable a does not cause variable b
+        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_a_not_cause_b_expected_response)
+        result = modeler.suggest_relationship(variable_a, variable_b, description_a, description_b, domain,
+                                              strategy=Strategy.ToT_Single, ask_reference=True)
+        assert result == test_suggest_relationship_a_not_cause_b_expected_result
+
+    def test__build_relationship_program(self):
+        modeler = TuebingenModelSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        result = modeler._build_relationship_program(variable_a, description_a, variable_b, description_b, domain,
+                                                     use_description=False, ask_reference=False)
+        assert result == test__build_relationship_program_expected_result
diff --git a/pywhyllm/tests/model_suggester/test_validation_suggester.py b/pywhyllm/tests/model_suggester/test_validation_suggester.py