Skip to content

Commit 9184e56

Browse files
authored
Merge pull request #39 from grace-sng7/test_tuebingen_validation
Test classes for suggesters #2
2 parents 90133c8 + 0b26828 commit 9184e56

7 files changed

+450
-214
lines changed

pywhyllm/suggesters/tuebingen_model_suggester.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ class Strategy(Enum):
1717

1818

1919
class TuebingenModelSuggester(ModelSuggester):
20-
def __init__(self, llm):
20+
def __init__(self, llm=None):
2121
super().__init__(llm)
2222

2323
def suggest_description(
24-
self, variable, context=None, ask_reference=False
24+
self, variable, ask_reference=False
2525
):
2626
generate_description = self._build_description_program(variable)
2727

@@ -255,11 +255,11 @@ def _build_relationship_program(
255255
the answer within the tags, <answer>Yes/No</answer>, and the most influential reference within
256256
the tags <reference>Author, Title, Year of publication</reference>.
257257
\n\n\n----------------\n\n\n<answer>Yes</answer>\n<reference>Author, Title, Year of
258-
publication</reference>\n\n\n----------------\n\n\n<answer>No</answer> {{~/user}}"""
258+
publication</reference>\n\n\n----------------\n\n\n<answer>No</answer>"""
259259
else:
260260
query["user"] += """When consensus is reached, thinking carefully and factually, explain the council's answer.
261261
Provide the answer within the tags, <answer>Yes/No</answer>.
262-
\n\n\n----------------\n\n\n<answer>Yes</answer>\n\n\n----------------\n\n\n<answer>No</answer> {{~/user}}"""
262+
\n\n\n----------------\n\n\n<answer>Yes</answer>\n\n\n----------------\n\n\n<answer>No</answer>"""
263263

264264
elif use_strategy == Strategy.CoT:
265265
if use_description:

pywhyllm/suggesters/validation_suggester.py

Lines changed: 110 additions & 208 deletions
Large diffs are not rendered by default.
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# TESTS
2+
variable = "water"
3+
variable_a = "water intake"
4+
description_a = "the amount of water a person drinks per day"
5+
variable_b = "hydration level"
6+
description_b = "the level of hydration in the body"
7+
domain = "biology"
8+
9+
# MOCK_RESPONSES
10+
test_suggest_description_expected_response = "<description>Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states.</description>"
11+
test_suggest_onesided_relationship_a_cause_b_expected_response = "<answer>A</answer>"
12+
test_suggest_onesided_relationship_a_not_cause_b_expected_response = "<answer>B</answer>"
13+
test_suggest_relationship_a_cause_b_expected_response = "<answer>Yes</answer> <reference>Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458.</reference>"
14+
test_suggest_relationship_a_not_cause_b_expected_response = "<answer>No</answer> <reference>Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458.</reference>"
15+
16+
# ASSERTIONS
17+
test_suggest_description_expected_result = ([
18+
"Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states."],
19+
[])
20+
test_suggest_onesided_relationship_a_cause_b_expected_result = 1
21+
test_suggest_onesided_relationship_a_not_cause_b_expected_result = 0
22+
test__build_description_program_no_context_no_reference_expected_result = {
23+
'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal \n is to provide factual and succinct description of the given concept.',
24+
'user': " Describe the concept of water.\n In one sentence, provide a factual and succinct description of water\n Let's think step-by-step to make sure that we have a proper and clear description. Then provide \n your final answer within the tags, <description></description>."}
25+
test__build_description_program_no_context_with_reference_expected_result = {
26+
'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal \n is to provide factual and succinct description of the given concept.',
27+
'user': ' Describe the concept of water.\n In one sentence, provide a factual and succinct description of water"\n Then provide two research papers that support your description.\n Let\'s think step-by-step to make sure that we have a proper and clear description. Then provide \n your final answer within the tags, <description></description>, and each research paper within the \n tags <paper></paper>.'}
28+
test__build_description_program_with_context_with_reference_expected_result = {
29+
'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal is \n to provide factual and succinct descriptions related to the given concept and context.',
30+
'user': "Using this context about the particular variable, describe the concept of water.\n In one sentence, provide a factual and succinct description of waterThen provide two research papers that support your description.\n Let's think step-by-step to make sure that we have a proper and clear description. Then provide your final \n answer within the tags, <description></description>, and each research paper within the tags <reference></reference>."}
31+
test__build_description_program_with_context_no_reference_expected_result = {
32+
'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal is \n to provide factual and succinct descriptions related to the given concept and context.',
33+
'user': "Using this context about the particular variable, describe the concept of water.\n In one sentence, provide a factual and succinct description of water\n Let's think step-by-step to make sure that we have a proper and clear description. Then provide your final \n answer within the tags, <description></description>."}
34+
test_suggest_relationship_a_cause_b_expected_result = (1,
35+
[
36+
'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.'])
37+
test_suggest_relationship_a_not_cause_b_expected_result = (0,
38+
[
39+
'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.'])
40+
test__build_relationship_program_expected_result = {
41+
'system': 'You are a helpful assistant on causal reasoning and biology. Your '
42+
'goal is to answer \n'
43+
' questions about cause and effect in a factual and '
44+
'concise way.',
45+
'user': 'can changing water intake change hydration level? Answer Yes or '
46+
'No.When consensus is reached, thinking carefully and factually, '
47+
"explain the council's answer. \n"
48+
' Provide the answer within the tags, '
49+
'<answer>Yes/No</answer>.\n'
50+
' \n'
51+
'\n'
52+
'\n'
53+
'----------------\n'
54+
'\n'
55+
'\n'
56+
'<answer>Yes</answer>\n'
57+
'\n'
58+
'\n'
59+
'----------------\n'
60+
'\n'
61+
'\n'
62+
'<answer>No</answer>'}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# TESTS
2+
test_vars = ["smoking", "lung cancer", "exercise habits", "air pollution exposure"]
3+
domain_expertises = ['Epidemiology']
4+
5+
# MOCK RESPONSES
6+
test_latent_confounders_expected_response = "<confounding_factor>socio-economic status</confounding_factor> <confounding_factor>mental health</confounding_factor>"
7+
test_negative_controls_expected_response = "<negative_control>exercise habits</negative_control>"
8+
test_parent_critique_expected_response = "None"
9+
test_children_critique_expected_response = "<influenced_factor>lung cancer</influenced_factor>"
10+
test_pairwise_critique_expected_response = "The answer is <answer>A</answer>"
11+
test_critique_graph_parent_expected_response = ["None",
12+
"<influencing_factor>smoking</influencing_factor> <influencing_factor>air pollution exposure</influencing_factor>",
13+
"<influencing_factor>air pollution exposure</influencing_factor>",
14+
"None"]
15+
test_critique_graph_children_expected_response = ["<influenced_factor>lung cancer</influenced_factor>",
16+
"<influenced_factor>exercise habits</influenced_factor>",
17+
"<influenced_factor>lung cancer</influenced_factor>",
18+
"<influenced_factor>lung cancer</influenced_factor> <influenced_factor>exercise habits</influenced_factor>"]
19+
test_critique_graph_pairwise_expected_response = ["<answer>A</answer>", "<answer>A</answer>", "<answer>C</answer>",
20+
"<answer>B</answer>", "<answer>B</answer>", "<answer>B</answer>"]
21+
22+
# ASSERTIONS
23+
test_suggest_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1},
24+
[{'mental health': 1, 'socio-economic status': 1},
25+
['socio-economic status', 'mental health']])
26+
test_request_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1},
27+
['socio-economic status', 'mental health'])
28+
test_suggest_negative_controls_expected_results = (
29+
{'exercise habits': 1}, [{'exercise habits': 1}, ['exercise habits']])
30+
test_request_negative_controls_expected_results = ({'exercise habits': 1}, ['exercise habits'])
31+
test_parent_critique_expected_results = []
32+
test_children_critique_expected_results = ['lung cancer']
33+
test_pairwise_critique_expected_results = ('smoking', 'lung cancer')
34+
test_critique_graph_parent_expected_results = ({('air pollution exposure', 'exercise habits'): 1,
35+
('air pollution exposure', 'lung cancer'): 1,
36+
('air pollution exposure', 'smoking'): 1,
37+
('smoking', 'lung cancer'): 1},
38+
{('air pollution exposure', 'exercise habits'): 1,
39+
('air pollution exposure', 'lung cancer'): 1,
40+
('smoking', 'lung cancer'): 1})
41+
test_critique_graph_children_expected_results = ({('air pollution exposure', 'smoking'): 1,
42+
('exercise habits', 'air pollution exposure'): 1,
43+
('exercise habits', 'smoking'): 1,
44+
('lung cancer', 'air pollution exposure'): 1,
45+
('lung cancer', 'exercise habits'): 1,
46+
('lung cancer', 'smoking'): 1},
47+
{('exercise habits', 'air pollution exposure'): 1,
48+
('exercise habits', 'lung cancer'): 1,
49+
('lung cancer', 'air pollution exposure'): 1,
50+
('lung cancer', 'exercise habits'): 1,
51+
('lung cancer', 'smoking'): 1})
52+
test_critique_graph_pairwise_expected_results = ({('air pollution exposure', 'exercise habits'): 1,
53+
('exercise habits', 'lung cancer'): 1,
54+
('smoking', 'air pollution exposure'): 1,
55+
('smoking', 'exercise habits'): 1,
56+
('smoking', 'lung cancer'): 1},
57+
{('smoking', 'lung cancer'): 1,
58+
('smoking', 'exercise habits'): 1,
59+
('exercise habits', 'lung cancer'): 1,
60+
('air pollution exposure', 'lung cancer'): 1,
61+
('air pollution exposure', 'exercise habits'): 1})

pywhyllm/tests/model_suggester/test_identification_suggester.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,20 @@
33
from guidance.models._openai import OpenAI
44

55
from pywhyllm.suggesters.identification_suggester import IdentificationSuggester
6+
from pywhyllm.suggesters.model_suggester import ModelSuggester
67
from pywhyllm.tests.model_suggester.data_providers.model_suggester_data_provider import *
78
from pywhyllm.tests.model_suggester.data_providers.identification_suggester_data_provider import *
8-
from pywhyllm.tests.model_suggester.test_model_suggester import TestModelSuggester
99

1010
class TestIdentificationSuggester(unittest.TestCase):
1111
def test_suggest_backdoor(self):
12-
return TestModelSuggester().test_suggest_confounders()
12+
modeler = IdentificationSuggester()
13+
mock_llm = MagicMock(spec=OpenAI)
14+
modeler.llm = mock_llm
15+
mock_model_suggester = MagicMock(spec=ModelSuggester)
16+
modeler.model_suggester = mock_model_suggester
17+
mock_model_suggester.suggest_confounders = MagicMock(return_value=test_suggest_confounders_expected_results)
18+
result = modeler.suggest_backdoor(test_vars[0], test_vars[1], test_vars, test_domain_expertises_expected_result)
19+
assert result == test_suggest_confounders_expected_results
1320

1421
def test_suggest_mediators(self):
1522
modeler = IdentificationSuggester()
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import unittest
2+
from unittest.mock import MagicMock
3+
from guidance.models._openai import OpenAI
4+
5+
from pywhyllm.suggesters.tuebingen_model_suggester import TuebingenModelSuggester, Strategy
6+
from pywhyllm.tests.model_suggester.data_providers.tuebingen_model_suggester_data_provider import *
7+
8+
9+
class TestTuebingenModelSuggester(unittest.TestCase):
10+
def test_suggest_description(self):
11+
modeler = TuebingenModelSuggester()
12+
mock_llm = MagicMock(spec=OpenAI)
13+
modeler.llm = mock_llm
14+
15+
mock_llm.__add__ = MagicMock(return_value=mock_llm)
16+
mock_llm.__getitem__ = MagicMock(return_value=test_suggest_description_expected_response)
17+
result = modeler.suggest_description(variable, True)
18+
assert result == test_suggest_description_expected_result
19+
20+
def test_suggest_onesided_relationship(self):
21+
modeler = TuebingenModelSuggester()
22+
mock_llm = MagicMock(spec=OpenAI)
23+
modeler.llm = mock_llm
24+
25+
mock_llm.__add__ = MagicMock(return_value=mock_llm)
26+
#Given the two variables and their descriptions, variable a causes variable b
27+
mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_a_cause_b_expected_response)
28+
result = modeler.suggest_onesided_relationship(variable_a, description_a, variable_b, description_b)
29+
assert result == test_suggest_onesided_relationship_a_cause_b_expected_result
30+
31+
#Given the two variables and their descriptions, variable a does not cause variable b
32+
mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_a_not_cause_b_expected_response)
33+
result = modeler.suggest_onesided_relationship(variable_a, description_a, variable_b, description_b)
34+
assert result == test_suggest_onesided_relationship_a_not_cause_b_expected_result
35+
36+
def test__build_description_program(self):
37+
modeler = TuebingenModelSuggester()
38+
mock_llm = MagicMock(spec=OpenAI)
39+
modeler.llm = mock_llm
40+
#Test no context, no reference
41+
result = modeler._build_description_program(variable, False, False)
42+
assert result == test__build_description_program_no_context_no_reference_expected_result
43+
#Test no context, with reference
44+
result = modeler._build_description_program(variable, False, True)
45+
assert result == test__build_description_program_no_context_with_reference_expected_result
46+
#Test with context, no reference
47+
result = modeler._build_description_program(variable, True, False)
48+
assert result == test__build_description_program_with_context_no_reference_expected_result
49+
#Test with context, with reference
50+
result = modeler._build_description_program(variable, True, True)
51+
assert result == test__build_description_program_with_context_with_reference_expected_result
52+
53+
def test_suggest_relationship(self):
54+
modeler = TuebingenModelSuggester()
55+
mock_llm = MagicMock(spec=OpenAI)
56+
modeler.llm = mock_llm
57+
58+
mock_llm.__add__ = MagicMock(return_value=mock_llm)
59+
#Given the two variables and their descriptions, variable a causes variable b
60+
mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_a_cause_b_expected_response)
61+
result = modeler.suggest_relationship(variable_a, variable_b, description_a, description_b, domain,
62+
strategy=Strategy.ToT_Single, ask_reference=True)
63+
assert result == test_suggest_relationship_a_cause_b_expected_result
64+
#Given the two variables and their descriptions, variable a does not cause variable b
65+
mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_a_not_cause_b_expected_response)
66+
result = modeler.suggest_relationship(variable_a, variable_b, description_a, description_b, domain,
67+
strategy=Strategy.ToT_Single, ask_reference=True)
68+
assert result == test_suggest_relationship_a_not_cause_b_expected_result
69+
70+
def test__build_relationship_program(self):
71+
modeler = TuebingenModelSuggester()
72+
mock_llm = MagicMock(spec=OpenAI)
73+
modeler.llm = mock_llm
74+
75+
result = modeler._build_relationship_program(variable_a, description_a, variable_b, description_b, domain,
76+
use_description=False, ask_reference=False)
77+
assert result == test__build_relationship_program_expected_result

0 commit comments

Comments
 (0)