Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion seldonian/candidate_selection/candidate_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def __init__(
n_safety,
parse_trees,
primary_objective,
mode,
optimization_technique="barrier_function",
optimizer="Powell",
initial_solution=None,
Expand Down Expand Up @@ -68,6 +69,7 @@ def __init__(

"""
self.regime = regime
self.mode = mode
self.model = model
self.candidate_dataset = candidate_dataset
self.n_safety = n_safety
Expand Down Expand Up @@ -389,8 +391,9 @@ def objective_with_barrier(self, theta):
branch="candidate_selection",
n_safety=self.n_safety,
regime=self.regime,
mode=self.mode,
use_candidate_prior=False
)

pt.propagate_bounds(**bounds_kwargs)

# Check if the i-th behavioral constraint is satisfied
Expand Down Expand Up @@ -479,6 +482,7 @@ def get_constraint_upper_bounds(self, theta):
branch="candidate_selection",
n_safety=self.n_safety,
regime=self.regime,
mode=self.mode
)

pt.propagate_bounds(**bounds_kwargs)
Expand Down
77 changes: 77 additions & 0 deletions seldonian/parse_tree/mcmc/likelihood.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import autograd.numpy as np

###### Likelihhood Ratio Functions
def Mean_Squared_Error_Likelihood_Ratio(proposal, original, zhat, std):
if std == 0:
std = 1e-15
# print(proposal, original, np.exp(- ((zhat - proposal) ** 2 - (zhat - original) ** 2).sum() / 2 / std ** 2))
return np.exp(- ((zhat - proposal) ** 2 - (zhat - original) ** 2).sum() / 2 / std ** 2)

##### Likelihood Ratio Functions when also infer std
def Mean_Squared_Error_Likelihood_Ratio_Infer_Std(proposal, original, zhat):
"""
:param proposal:
Proposal parameters. [mean, std]
:type List(float)

:param original:
Current parameters. [mean, std]
:type List(float)
"""
mean_prop, std_prop = proposal
mean_orig, std_orig = original

likelihood_ratio = np.exp(-(((zhat - mean_prop) / std_prop) ** 2 - ((zhat - mean_orig) / std_orig) ** 2).sum() / 2 + np.log(std_orig / std_prop) * len(zhat))
# print(likelihood_ratio)
return likelihood_ratio

################################
def get_likelihood_ratio(statistic_name, zhat, datasize, infer_std):
"""
Function to get likelihood ratio functions from
statistic name and zhat

:param datasize:
Size of safety data set.
:type datasize: int

:param infer_std:
Whether to infer std
:type infer_std: bool

:ivar power:
Ratio of size of safety data set
to size of zhat
:vartype power: float
"""

power = (datasize / len(zhat))
# print("Power:", power)

if statistic_name == "Mean_Squared_Error":
if infer_std:
def wrapped_likelihood_ratio(proposal, original):
return Mean_Squared_Error_Likelihood_Ratio_Infer_Std(proposal, original, zhat)
return wrapped_likelihood_ratio
else:
# Use std of zhat as std of normal assumption
# Scale std of zhat to predict what would be
# the std for safety data (replace denominator
# of variance with size of safety set instead of
# size of candidate set)
std = np.std(zhat) * np.sqrt(len(zhat) / datasize)
# print(std)

# Wrap likelihood ratio function such that
# it only takes proposal g and original (current) g
# as inputs.
def wrapped_likelihood_ratio(proposal, original):
# If in candidate selection, scale likelihood ratio
# according to size of safety data set.
# If in safety test, power is 1.
return Mean_Squared_Error_Likelihood_Ratio(proposal, original, zhat, std) ** power

return wrapped_likelihood_ratio

raise NotImplementedError()

134 changes: 134 additions & 0 deletions seldonian/parse_tree/mcmc/mcmc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import numpy as np
from .proposal import ProposalDistribution
from .prior import PriorDistribution
from .likelihood import get_likelihood_ratio

class MetropolisHastings:

def __init__(self, proposal_width, prior_type, prior_mean, prior_width, likelihood_ratio, infer_std):
self.proposal_dist = ProposalDistribution(proposal_width, infer_std)
self.prior_dist = PriorDistribution(prior_type, prior_mean, prior_width, infer_std)
self.likelihood_ratio = likelihood_ratio
self.infer_std = infer_std

def run(self, N=200000, skip_interval=20, burn_in=5000):
verbose = False
if verbose:
print("Running MCMC:")

if self.infer_std:
# mean and std
g = np.array([0, 5]) # start with N(0, 1)
else:
g = 0 # initial g

samples = [] # samples obtained
all_gs = [g] # keep track of every g in the chain

t = 0
t_skip = 0
acceptance_count = 0

while True:

g_proposal = self.proposal_dist.propose(g)

prior_ratio = self.prior_dist.prior_ratio(g_proposal, g)
likelihood_ratio = self.likelihood_ratio(g_proposal, g)
accept_ratio = prior_ratio * likelihood_ratio
# print(accept_ratio)

if np.random.uniform() <= accept_ratio:
g = g_proposal
acceptance_count += 1

if t_skip == 0 and t >= burn_in :
samples.append(g)
# print(g)

all_gs.append(g)
t_skip = (t_skip + 1) % skip_interval
t += 1

if t % 10000 == 0:
if verbose:
print(f"{t} Steps Completed")

if t == N:
break

# if verbose:
print("Acceptance rate:", acceptance_count / N)

if self.infer_std:
# keep only mean
# print(np.mean(np.array(samples)[:, 1]))
samples = np.array(samples)[:, 0]

return samples, all_gs


def run_mcmc_default(statistic_name, zhat, datasize, **kwargs):
"""
Default MCMC settings using jeffrey's pior.

:ivar infer_std:
If true, infer std using mcmc.
Else, use std derived from candidate
data.
:vartype infer_std: bool
"""

# --TODO-- automatic tune proposal width
# such that the acceptance rate is between
# 0.2 and 0.8.

infer_std = True

proposal_width = 0.2

if kwargs["branch"] == "candidate_selection":
prior_type = "jeffrey"
prior_width = None
prior_mean = None

elif kwargs["branch"] == "safety_test":
# use candidate zhat mean to construct prior for safety test
if kwargs["use_candidate_prior"]:
prior_type = "normal"
prior_mean = kwargs["zhat_mean"]
print("HERE", prior_mean)
prior_width = 0.1
else:
prior_type = "jeffrey"
prior_width = None
prior_mean = None


likelihood_ratio = get_likelihood_ratio(statistic_name, zhat, datasize, infer_std)

mh = MetropolisHastings(proposal_width, prior_type, prior_mean, prior_width, likelihood_ratio, infer_std)
samples, _ = mh.run(N=100000, skip_interval=10, burn_in=3000)



# if kwargs["branch"] == "safety_test":
# import matplotlib.pyplot as plt
# from scipy.stats import t
# plt.hist(samples, bins=100, density=True, label="posterior", alpha=0.5)
# plt.axvline(np.quantile(samples, 0.9, method="inverted_cdf"))

# print(np.quantile(samples, 0.95), np.mean(zhat) + np.std(zhat) / np.sqrt(datasize) * t.ppf(0.95, datasize - 1))

# normal_samples = np.random.normal(loc=np.mean(zhat), scale=np.std(zhat) / np.sqrt(datasize), size=(10000,))
# plt.hist(normal_samples, bins=100, density=True, label="gaussian", alpha=0.5)

# plt.hist(zhat, bins=100, density=True, label="zhat", alpha=0.5)
# plt.legend()
# plt.show()


# print(np.quantile(samples, 0.9, method="inverted_cdf") )


return samples
24 changes: 24 additions & 0 deletions seldonian/parse_tree/mcmc/prior.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import numpy as np

class PriorDistribution:

def __init__(self, type, mean, width, infer_std):
self.type = type
self.mean = mean
self.width = width
self.infer_std = infer_std

def prior_ratio(self, proposal, original):
if self.type == "jeffrey":
if self.infer_std:
# prior(mu, sigma) = 1 / sigma ** 2
return (original[1] / proposal[1]) ** 2
else:
return 1
elif self.type == "normal":
if self.infer_std:
raise NotImplementedError()
else:
return np.exp(- ((proposal - self.mean) ** 2 - (original - self.mean) ** 2) / 2 / self.width ** 2)
else:
raise NotImplementedError()
20 changes: 20 additions & 0 deletions seldonian/parse_tree/mcmc/proposal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy as np

class ProposalDistribution:
"""
Normal proposal distribution
"""
def __init__(self, proposal_width, infer_std):
self.proposal_width = proposal_width
self.infer_std = infer_std

def propose(self, x):
# supports multidimension x
proposal = np.random.normal(loc=x, scale=self.proposal_width)

if self.infer_std:
# make sure std > 0
if proposal[1] <= 0:
proposal[1] = 1e-10

return proposal
36 changes: 33 additions & 3 deletions seldonian/parse_tree/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

from seldonian.models.objectives import sample_from_statistic, evaluate_statistic
from seldonian.utils.stats_utils import *

from .mcmc.mcmc import run_mcmc_default
import copy

class Node(object):
def __init__(self, name, lower, upper):
Expand Down Expand Up @@ -273,17 +274,32 @@ def calculate_bounds(self, **kwargs):
return {"lower": lower, "upper": upper}

else:
# Real confidence bound
# Real confidence bound / credible bound

# --TODO-- abstract away to support things like
# getting confidence intervals from bootstrap
# and RL cases
estimator_samples = self.zhat(**kwargs)
if kwargs["mode"] == "bayesian":
if kwargs["use_candidate_prior"]:
# Use candidate data set to get a prior for MCMC
candidate_kwargs = copy.deepcopy(kwargs)
candidate_kwargs["dataset"] = kwargs["candidate_dataset"]
candidate_kwargs["branch"] = "candidate_selection"
candidate_kwargs["n_safety"] = kwargs["datasize"]
candidate_data_dict, _ = self.calculate_data_forbound(**candidate_kwargs)
candidate_kwargs["data_dict"] = candidate_data_dict
candidate_kwargs["datasize"] = None # doesn't matter for evaluate_statistics

value = self.calculate_value(**candidate_kwargs)
kwargs["zhat_mean"] = value
posterior_samples = run_mcmc_default(self.measure_function_name, estimator_samples, **kwargs)

branch = kwargs["branch"]
data_dict = kwargs["data_dict"]
bound_kwargs = kwargs
bound_kwargs["data"] = estimator_samples
bound_kwargs["data"] = estimator_samples if kwargs["mode"] == "frequentist" else posterior_samples
bound_kwargs["bound_method"] = "ttest" if kwargs["mode"] == "frequentist" else "quantile"
bound_kwargs["delta"] = self.delta

# If lower and upper are both needed,
Expand Down Expand Up @@ -371,6 +387,8 @@ def predict_HC_lowerbound(self, data, datasize, delta, **kwargs):
lower = data.mean() - 2 * stddev(data) / np.sqrt(datasize) * tinv(
1.0 - delta, datasize - 1
)
elif bound_method == "quantile":
lower = np.quantile(data, delta, method="inverted_cdf")
else:
raise NotImplementedError(
f"Bounding method {bound_method} is not supported"
Expand Down Expand Up @@ -401,6 +419,8 @@ def predict_HC_upperbound(self, data, datasize, delta, **kwargs):
lower = data.mean() + 2 * stddev(data) / np.sqrt(datasize) * tinv(
1.0 - delta, datasize - 1
)
elif bound_method == "quantile":
lower = np.quantile(data, 1 - delta, method="inverted_cdf")
else:
raise NotImplementedError(
f"Bounding method {bound_method} is not supported"
Expand Down Expand Up @@ -442,6 +462,9 @@ def predict_HC_upper_and_lowerbound(self, data, datasize, delta, **kwargs):

elif bound_method == "manual":
pass
elif bound_method == "quantile":
lower = np.quantile(data, delta / 2, method="inverted_cdf")
upper = np.quantile(data, 1 - delta / 2, method="inverted_cdf")
else:
raise NotImplementedError(
f"Bounding method {bound_method}" " is not supported"
Expand Down Expand Up @@ -471,6 +494,8 @@ def compute_HC_lowerbound(self, data, datasize, delta, **kwargs):
lower = data.mean() - stddev(data) / np.sqrt(datasize) * tinv(
1.0 - delta, datasize - 1
)
elif bound_method == "quantile":
lower = np.quantile(data, delta, method="inverted_cdf")
else:
raise NotImplementedError(
f"Bounding method {bound_method}" " is not supported"
Expand Down Expand Up @@ -499,6 +524,8 @@ def compute_HC_upperbound(self, data, datasize, delta, **kwargs):
upper = data.mean() + stddev(data) / np.sqrt(datasize) * tinv(
1.0 - delta, datasize - 1
)
elif bound_method == "quantile":
upper = np.quantile(data, 1 - delta, method="inverted_cdf")
else:
raise NotImplementedError(
f"Bounding method {bound_method}" " is not supported"
Expand Down Expand Up @@ -539,6 +566,9 @@ def compute_HC_upper_and_lowerbound(self, data, datasize, delta, **kwargs):

elif bound_method == "manual":
pass
elif bound_method == "quantile":
lower = np.quantile(data, delta / 2, method="inverted_cdf")
upper = np.quantile(data, 1 - delta / 2, method="inverted_cdf")
else:
raise NotImplementedError(
f"Bounding method {bound_method}" " is not supported"
Expand Down
Loading