Skip to content

Commit 8025be0

Browse files
author
=
committed
cleaning up references to L2,L2, Lambda etc, in favor of more readable parameter names like w_pen and v_pen
1 parent 0302907 commit 8025be0

File tree

9 files changed

+112
-109
lines changed

9 files changed

+112
-109
lines changed

SparseSC/__init__.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77
from SparseSC.fit_ct import ct_v_matrix, ct_weights, ct_score
88

99
# Public API
10-
from SparseSC.cross_validation import score_train_test, score_train_test_sorted_lambdas, \
11-
CV_score
10+
from SparseSC.cross_validation import (
11+
score_train_test,
12+
score_train_test_sorted_v_pens,
13+
CV_score,
14+
)
1215
from SparseSC.tensor import tensor
1316
from SparseSC.weights import weights
14-
from SparseSC.lambda_utils import get_max_lambda, L2_pen_guestimate
17+
from SparseSC.lambda_utils import get_max_v_pen, w_pen_guestimate
1518

1619
# The version as used in the setup.py
1720
__version__ = "0.1.0"

SparseSC/cross_validation.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def score_train_test(X,
158158
return v_mat, w_pen, s
159159

160160

161-
def score_train_test_sorted_lambdas(v_pen,
161+
def score_train_test_sorted_v_pens(v_pen,
162162
start=None,
163163
cache=False,
164164
progress=False,
@@ -184,14 +184,14 @@ def score_train_test_sorted_lambdas(v_pen,
184184
if progress > 0 and (i % progress) == 0:
185185
t1 = time.time()
186186
if FoldNumber is None:
187-
print("lambda: %0.4f, value %s of %s, time elapsed: %0.4f sec." %
187+
print("v_pen: %0.4f, value %s of %s, time elapsed: %0.4f sec." %
188188
(Lam, i+1, len(v_pen), t1 - t0, ))
189-
#print("iteration %s of %s time: %0.4f ,lambda: %0.4f, diags: %s" %
189+
#print("iteration %s of %s time: %0.4f ,v_pen: %0.4f, diags: %s" %
190190
# (i+1, len(v_pen), t1 - t0, Lam, np.diag(v_mat),))
191191
else:
192-
print("Fold %s,lambda: %0.4f, value %s of %s, time elapsed: %0.4f sec." %
192+
print("Fold %s,v_pen: %0.4f, value %s of %s, time elapsed: %0.4f sec." %
193193
(FoldNumber, Lam, i+1, len(v_pen), t1 - t0, ))
194-
#print("Fold %s, iteration %s of %s, time: %0.4f ,lambda: %0.4f, diags: %s" %
194+
#print("Fold %s, iteration %s of %s, time: %0.4f ,v_pen: %0.4f, diags: %s" %
195195
# (FoldNumber, i+1, len(v_pen), t1 - t0, Lam, np.diag(v_mat),))
196196
t0 = time.time()
197197

@@ -209,7 +209,7 @@ def CV_score(X,Y,
209209
max_workers=None,
210210
progress=None,
211211
**kwargs):
212-
""" Cross fold validation for 1 or more L1 Penalties, holding the L2 penalty fixed.
212+
""" Cross fold validation for 1 or more v Penalties, holding the w penalty fixed.
213213
"""
214214

215215
# PARAMETER QC
@@ -234,13 +234,13 @@ def CV_score(X,Y,
234234
try:
235235
_v_pen = iter(v_pen)
236236
except TypeError:
237-
# Lambda is a single value
238-
multi_lambda = False
237+
# v_pen is a single value
238+
multi_v_pen = False
239239
__score_train_test__ = score_train_test
240240
else:
241-
# Lambda is an iterable of values
242-
multi_lambda = True
243-
__score_train_test__ = score_train_test_sorted_lambdas
241+
# v_pen is an iterable of values
242+
multi_v_pen = True
243+
__score_train_test__ = score_train_test_sorted_v_pens
244244

245245
if X_treat is not None:
246246

@@ -381,7 +381,7 @@ def CV_score(X,Y,
381381
# extract the score.
382382
_, _, scores = list(zip(* results))
383383

384-
if multi_lambda:
384+
if multi_v_pen:
385385
total_score = [sum(s) for s in zip(*scores)]
386386
else:
387387
total_score = sum(scores)

SparseSC/fit.py

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from sklearn.model_selection import KFold
88

99
# From the Public API
10-
from SparseSC.lambda_utils import get_max_lambda, L2_pen_guestimate
10+
from SparseSC.lambda_utils import get_max_v_pen, w_pen_guestimate
1111
from SparseSC.cross_validation import CV_score
1212
from SparseSC.tensor import tensor
1313
from SparseSC.weights import weights
@@ -23,8 +23,8 @@ def fit(X,Y,
2323
covariate_penalties = None, # Float or an array of floats
2424
# PARAMETERS USED TO CONSTRUCT DEFAULT GRID COVARIATE_PENALTIES
2525
grid = None, # USER SUPPLIED GRID OF COVARIATE PENALTIES
26-
Lambda_min = 1e-6,
27-
Lambda_max = 1,
26+
min_v_pen = 1e-6,
27+
max_v_pen = 1,
2828
grid_points = 20,
2929
choice = "min",
3030
cv_folds = 10,
@@ -54,36 +54,36 @@ def fit(X,Y,
5454
5555
:param weight_penalty: Penalty applied to the difference
5656
between the current weights and the null weights (1/n). default
57-
provided by :func:``L2_pen_guestimate``.
57+
provided by :func:``w_pen_guestimate``.
5858
:type weight_penalty: float, Optional
5959
6060
:param covariate_penalties: penalty
6161
(penalties) applied to the magnitude of the covariate weights.
6262
Defaults to ``[ Lambda_c_max * g for g in grid]``, where
63-
`Lambda_c_max` is determined via :func:`get_max_lambda` .
63+
`Lambda_c_max` is determined via :func:`get_max_v_pen` .
6464
:type covariate_penalties: float | float[], optional
6565
6666
:param grid: only used when `covariate_penalties` is not provided.
67-
Defaults to ``np.exp(np.linspace(np.log(Lambda_min),np.log(Lambda_max),grid_points))``
67+
Defaults to ``np.exp(np.linspace(np.log(min_v_pen),np.log(max_v_pen),grid_points))``
6868
:type grid: float | float[], optional
6969
70-
:param Lambda_min: Lower bound for ``grid`` when
70+
:param min_v_pen: Lower bound for ``grid`` when
7171
``covariate_penalties`` and ``grid`` are not provided. Must be in the
7272
range ``(0,1)``
73-
:type Lambda_min: float, default = 1e-6
73+
:type min_v_pen: float, default = 1e-6
7474
75-
:param Lambda_max: Upper bound for ``grid`` when
75+
:param max_v_pen: Upper bound for ``grid`` when
7676
``covariate_penalties`` and ``grid`` are not provided. Must be in the
7777
range ``(0,1]``
78-
:type Lambda_max: float, default = 1
78+
:type max_v_pen: float, default = 1
7979
8080
:param grid_points: number of points in the ``grid`` parameter when
8181
``covariate_penalties`` and ``grid`` are not provided
8282
:type grid_points: int, default = 20
8383
8484
:param choice: Method for choosing from among the
8585
covariate_penalties. Only used when covariate_penalties is an
86-
iterable. Defaults to ``"min"`` which selects the lambda parameter
86+
iterable. Defaults to ``"min"`` which selects the v_pen parameter
8787
associated with the lowest cross validation error.
8888
:type choice: str or function. default = ``"min"``
8989
@@ -193,14 +193,14 @@ def fit(X,Y,
193193
# --------------------------------------------------
194194
# (sensible?) defaults
195195
# --------------------------------------------------
196-
# Get the L2 penalty guestimate: very quick ( milliseconds )
196+
# Get the weight penalty guestimate: very quick ( milliseconds )
197197
if weight_penalty is None:
198-
weight_penalty = L2_pen_guestimate(Xtrain)
198+
weight_penalty = w_pen_guestimate(Xtrain)
199199
if covariate_penalties is None:
200200
if grid is None:
201-
grid = np.exp(np.linspace(np.log(Lambda_min),np.log(Lambda_max),grid_points))
201+
grid = np.exp(np.linspace(np.log(min_v_pen),np.log(max_v_pen),grid_points))
202202
# GET THE MAXIMUM v_penS: quick ~ ( seconds to tens of seconds )
203-
v_pen_max = get_max_lambda(Xtrain,
203+
v_pen_max = get_max_v_pen(Xtrain,
204204
Ytrain,
205205
w_pen = weight_penalty,
206206
grad_splits = gradient_folds,
@@ -212,7 +212,7 @@ def fit(X,Y,
212212
# Retrospective Treatment Effects: ( *model_type = "prospective"*)
213213

214214
# --------------------------------------------------
215-
# Phase 1: extract cross fold residual errors for each lambda
215+
# Phase 1: extract cross fold residual errors for each v_pen
216216
# --------------------------------------------------
217217

218218
# SCORES FOR EACH VALUE OF THE GRID: very slow ( minutes to hours )
@@ -228,15 +228,15 @@ def fit(X,Y,
228228
**kwargs)
229229

230230
# GET THE INDEX OF THE BEST SCORE
231-
best_V_lambda = __choose(scores, covariate_penalties, choice)
231+
best_v_pen = __choose(scores, covariate_penalties, choice)
232232

233233
# --------------------------------------------------
234234
# Phase 2: extract V and weights: slow ( tens of seconds to minutes )
235235
# --------------------------------------------------
236236

237237
best_V = tensor(X = Xtrain,
238238
Y = Ytrain,
239-
v_pen = best_V_lambda,
239+
v_pen = best_v_pen,
240240
grad_splits = gradient_folds,
241241
random_state = gradient_seed, # TODO: Cleanup Task 1
242242
**kwargs)
@@ -270,7 +270,7 @@ def fit(X,Y,
270270
gradient_folds.append([control_units, treated_units])
271271

272272
# --------------------------------------------------
273-
# Phase 1: extract cross fold residual errors for each lambda
273+
# Phase 1: extract cross fold residual errors for each v_pen
274274
# --------------------------------------------------
275275

276276
# SCORES FOR EACH VALUE OF THE GRID: very slow ( minutes to hours )
@@ -286,15 +286,15 @@ def fit(X,Y,
286286
**kwargs)
287287

288288
# GET THE INDEX OF THE BEST SCORE
289-
best_V_lambda = __choose(scores, covariate_penalties, choice)
289+
best_v_pen = __choose(scores, covariate_penalties, choice)
290290

291291
# --------------------------------------------------
292292
# Phase 2: extract V and weights: slow ( tens of seconds to minutes )
293293
# --------------------------------------------------
294294

295295
best_V = tensor(X = X,
296296
Y = Y,
297-
v_pen = best_V_lambda,
297+
v_pen = best_v_pen,
298298
grad_splits = gradient_folds,
299299
random_state = gradient_seed, # TODO: Cleanup Task 1
300300
**kwargs)
@@ -306,7 +306,7 @@ def fit(X,Y,
306306
# unobserved ( || Y_treat - W Y_ctrl || ) in counter factual
307307

308308
# --------------------------------------------------
309-
# Phase 1: extract cross fold residual errors for each lambda
309+
# Phase 1: extract cross fold residual errors for each v_pen
310310
# --------------------------------------------------
311311

312312
# SCORES FOR EACH VALUE OF THE GRID: very slow ( minutes to hours )
@@ -322,7 +322,7 @@ def fit(X,Y,
322322
**kwargs)
323323

324324
# GET THE INDEX OF THE BEST SCORE
325-
best_V_lambda = __choose(scores, covariate_penalties, choice)
325+
best_v_pen = __choose(scores, covariate_penalties, choice)
326326

327327
# --------------------------------------------------
328328
# Phase 2: extract V and weights: slow ( tens of seconds to minutes )
@@ -332,7 +332,7 @@ def fit(X,Y,
332332
Y = Ytrain,
333333
X_treat = Xtest,
334334
Y_treat = Ytest,
335-
v_pen = best_V_lambda,
335+
v_pen = best_v_pen,
336336
**kwargs)
337337

338338

@@ -368,21 +368,21 @@ def fit(X,Y,
368368
# --------------------------------------------------
369369
if covariate_penalties is None:
370370
if grid is None:
371-
grid = np.exp(np.linspace(np.log(Lambda_min),np.log(Lambda_max),grid_points))
371+
grid = np.exp(np.linspace(np.log(min_v_pen),np.log(max_v_pen),grid_points))
372372
# GET THE MAXIMUM v_penS: quick ~ ( seconds to tens of seconds )
373-
v_pen_max = get_max_lambda(X,
373+
v_pen_max = get_max_v_pen(X,
374374
Y,
375375
w_pen = weight_penalty,
376376
grad_splits = gradient_folds,
377377
verbose=verbose)
378378
covariate_penalties = grid * v_pen_max
379379

380-
# Get the L2 penalty guestimate: very quick ( milliseconds )
380+
# Get the weight penalty guestimate: very quick ( milliseconds )
381381
if weight_penalty is None:
382-
weight_penalty = L2_pen_guestimate(X)
382+
weight_penalty = w_pen_guestimate(X)
383383

384384
# --------------------------------------------------
385-
# Phase 1: extract cross fold residual errors for each lambda
385+
# Phase 1: extract cross fold residual errors for each v_pen
386386
# --------------------------------------------------
387387

388388
# SCORES FOR EACH VALUE OF THE GRID: very slow ( minutes to hours )
@@ -398,15 +398,15 @@ def fit(X,Y,
398398
**kwargs)
399399

400400
# GET THE INDEX OF THE BEST SCORE
401-
best_V_lambda = __choose(scores, covariate_penalties, choice)
401+
best_v_pen = __choose(scores, covariate_penalties, choice)
402402

403403
# --------------------------------------------------
404404
# Phase 2: extract V and weights: slow ( tens of seconds to minutes )
405405
# --------------------------------------------------
406406

407407
best_V = tensor(X = X,
408408
Y = Y,
409-
v_pen = best_V_lambda,
409+
v_pen = best_v_pen,
410410
grad_splits = gradient_folds,
411411
random_state = gradient_seed, # TODO: Cleanup Task 1
412412
**kwargs)
@@ -423,7 +423,7 @@ def fit(X,Y,
423423
treated_units,
424424
model_type,
425425
# fitting parameters
426-
best_V_lambda,
426+
best_v_pen,
427427
weight_penalty,
428428
covariate_penalties,
429429
best_V,
@@ -437,18 +437,18 @@ def __choose(scores, covariate_penalties, choice):
437437
try:
438438
iter(covariate_penalties)
439439
except TypeError:
440-
best_lambda = scores
440+
best_v_pen = scores
441441
else:
442442
if choice == "min":
443443
best_i = np.argmin(scores)
444-
best_lambda = (covariate_penalties)[best_i]
444+
best_v_pen = (covariate_penalties)[best_i]
445445
elif callable(choice):
446-
best_lambda = choice(scores)
446+
best_v_pen = choice(scores)
447447
else:
448448
# TODO: this is a terrible place to throw this error
449449
raise ValueError("Unexpected value for choice parameter: %s" % choice)
450450

451-
return best_lambda
451+
return best_v_pen
452452

453453

454454
class SparseSCFit(object):

SparseSC/fit_ct.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def ct_v_matrix(X,
1515
start = None,
1616
w_pen = None,
1717
method = cdl_search,
18-
max_lambda = False, # this is terrible at least without documentation...
18+
return_max_v_pen = False, # this is terrible at least without documentation...
1919
verbose = False,
2020
gradient_message = "Calculating gradient",
2121
**kwargs):
@@ -41,18 +41,18 @@ def ct_v_matrix(X,
4141
:param start: initial values for the diagonals of the tensor matrix
4242
:type start: float[] or numpy.ndarray
4343
44-
:param w_pen: L2 penalty on the magnitude of the deviance of the weight
44+
:param w_pen: weight penalty on the magnitude of the deviance of the weight
4545
vector from null. Optional.
4646
:type w_pen: float
4747
4848
:param method: The name of a method to be used by scipy.optimize.minimize,
4949
or a callable with the same API as scipy.optimize.minimize
5050
:type method: str or callable
5151
52-
:param max_lambda: (Internal API) If ``True``, the return value is the maximum L1 penalty for
52+
:param return_max_v_pen: (Internal API) If ``True``, the return value is the maximum L1 penalty for
5353
which at least one element of the tensor matrix is
5454
non-zero.
55-
:type max_lambda: boolean
55+
:type return_max_v_pen: boolean
5656
5757
:param verbose: If true, print progress to the console (default: false)
5858
:type verbose: boolean
@@ -168,7 +168,7 @@ def _weights(V):
168168
raise exc
169169
return weights, A, B,b
170170

171-
if max_lambda:
171+
if return_max_v_pen:
172172
grad0 = _grad(zeros(K))
173173
return -grad0[grad0 < 0].min()
174174

0 commit comments

Comments
 (0)