Skip to content

Commit dc363a8

Browse files
committed
Add pending changes
1 parent 0307576 commit dc363a8

File tree

3 files changed

+43
-23
lines changed

3 files changed

+43
-23
lines changed

machine_learning_hep/analysis/analyzer_jets.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,7 @@ def _analyze(self, method = 'sidesub'):
838838
self.logger.debug("Final histogram: %s, jet pT %g to %g",
839839
var, range_ptjet[0], range_ptjet[1])
840840
# self.logger.debug(print_histogram(hproj_sel))
841+
ROOT.gStyle.SetOptStat(0)
841842
self._save_hist(
842843
hproj_sel,
843844
f'uf/h_{var}_{method}_unfolded_{mcordata}_' +
@@ -966,11 +967,16 @@ def estimate_feeddown(self):
966967
h_effkine_gen = self._build_effkine(
967968
rfile.Get(f'h_effkine_fd_gen_nocuts_{var}'),
968969
rfile.Get(f'h_effkine_fd_gen_cut_{var}'))
970+
self._save_hist(h_effkine_gen, f'fd/h_effkine-ptjet-{var}_fd_gen.png', 'text')
969971
h_effkine_det = self._build_effkine(
970972
rfile.Get(f'h_effkine_fd_det_nocuts_{var}'),
971973
rfile.Get(f'h_effkine_fd_det_cut_{var}'))
974+
self._save_hist(h_effkine_det, f'fd/h_effkine-ptjet-{var}_fd_det.png', 'text')
972975
h_response = rfile.Get(f'h_response_fd_{var}')
973-
h_response.Print('v')
976+
self._save_hist(project_hist(h_response, [0, 3], {}), f'fd/h_response_ptjet_{var}.png')
977+
self._save_hist(project_hist(h_response, [1, 4], {}), f'fd/h_response_pthf_{var}.png')
978+
self._save_hist(project_hist(h_response, [2, 5], {}), f'fd/h_response_shape_{var}.png')
979+
h_response.Print('all')
974980
print(f'fd folding for {var=}')
975981
h_response_norm = norm_response(h_response, 3)
976982
h3_fd_gen.Multiply(h_effkine_gen)

machine_learning_hep/processer_jet.py

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,17 @@ def _calculate_variables(self, df, verify=False): # pylint: disable=invalid-name
131131
df['hfPx'] = df.fPt * np.cos(df.fPhi)
132132
df['hfPy'] = df.fPt * np.sin(df.fPhi)
133133
df['hfPz'] = df.fPt * np.sinh(df.fEta)
134+
df['piPx'] = df.jetPx - df.hfPx
135+
df['piPy'] = df.jetPy - df.hfPy
136+
df['piPz'] = df.jetPz - df.hfPz
134137
df['zpar_num'] = df.jetPx * df.hfPx + df.jetPy * df.hfPy + df.jetPz * df.hfPz
135138
df['zpar_den'] = df.jetPx * df.jetPx + df.jetPy * df.jetPy + df.jetPz * df.jetPz
136139
df['zpar'] = df.zpar_num / df.zpar_den
137-
df[df['zpar'] == 1.]['zpar'] = .99999 # move 1 to last bin
140+
df[df['zpar'] >= 1.]['zpar'] = .99999 # move 1 to last bin
138141
df['nsub21'] = df.fNSub2 / df.fNSub1
142+
df['E_D'] = np.sqrt(1.86**2 + df.hfPx**2 + df.hfPy**2 + df.hfPz**2)
143+
df['E_pi'] = np.sqrt(.139**2 + df.piPx**2 + df.piPy**2 + df.piPz**2)
144+
df['M_D_pi'] = np.sqrt((df.E_D + df.E_pi)**2 - df.jetPx**2 - df.jetPy**2 - df.jetPz**2)
139145

140146
self.logger.debug('zg')
141147
df['zg_array'] = np.array(.5 - abs(df.fPtSubLeading / (df.fPtLeading + df.fPtSubLeading) - .5))
@@ -223,6 +229,8 @@ def process_histomass_single(self, index):
223229
dfquery(df, 'idx_match >= 0', inplace=True)
224230

225231
self._calculate_variables(df)
232+
# FIXME: suppress D*, move to DB
233+
df = df[(abs(df.M_D_pi - 2.01) > .01) & (df.fJetNConstituents == 2)]
226234

227235
for obs, spec in self.cfg('observables', {}).items():
228236
self.logger.debug('preparing histograms for %s', obs)
@@ -272,7 +280,8 @@ def process_efficiency_single(self, index):
272280
h_response_fd = {var:
273281
create_hist(
274282
f'h_response_fd_{var}',
275-
f";p_{{T}}^{{jet}} (GeV/#it{{c}});{var};p_{{T}}^{{jet}} (GeV/#it{{c}});{var};p_{{T}} (GeV/#it{{c}})",
283+
f";p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{var};" +
284+
f"p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{var}",
276285
self.binarrays_ptjet['det'][var], self.binarrays_obs['det']['fPt'], self.binarrays_obs['det'][var],
277286
self.binarrays_ptjet['gen'][var], self.binarrays_obs['gen']['fPt'], self.binarrays_obs['gen'][var])
278287
for var in self.cfg('observables', []) if not '-' in var}
@@ -371,7 +380,8 @@ def process_efficiency_single(self, index):
371380

372381
if cat in dfmatch and dfmatch[cat] is not None:
373382
self._prepare_response(dfmatch[cat], h_effkine, h_response, cat, var)
374-
self._prepare_response_fd(dfmatch[cat], h_effkine_fd, h_response_fd, var)
383+
if cat == 'np':
384+
self._prepare_response_fd(dfmatch[cat], h_effkine_fd, h_response_fd, var)
375385
f = self.cfg('frac_mcana', .2)
376386
_, df_mccorr = self.split_df(dfmatch[cat], f if f < 1. else 0.)
377387
self._prepare_response(df_mccorr, h_effkine_frac, h_response_frac, cat, var)
@@ -419,24 +429,24 @@ def _prepare_response_fd(self, dfi, h_effkine, h_response, var):
419429
axis_var_gen = get_axis(h_response[var], 5)
420430

421431
df = dfi
432+
fill_hist(h_response[var], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen', 'fJetPt_gen', 'fPt_gen', f'{var}_gen']])
433+
422434
# TODO: the first cut should be taken care of by under-/overflow bins, check their usage in analyzer
423-
df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) &
424-
(df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) &
425-
(df[var] >= axis_var_det.GetXmin()) & (df[var] < axis_var_det.GetXmax())]
435+
# df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) &
436+
# (df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) &
437+
# (df[var] >= axis_var_det.GetXmin()) & (df[var] < axis_var_det.GetXmax())]
426438
fill_hist(h_effkine[('det', 'nocuts', var)], df[['fJetPt', 'fPt', var]])
427-
df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) &
428-
(df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) &
429-
(df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())]
439+
# df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) &
440+
# (df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) &
441+
# (df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())]
430442
fill_hist(h_effkine[('det', 'cut', var)], df[['fJetPt', 'fPt', var]])
431443

432-
fill_hist(h_response[var], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen', 'fJetPt_gen', 'fPt_gen', f'{var}_gen']])
433-
434444
df = dfi
435-
df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) &
436-
(df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) &
437-
(df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())]
438-
fill_hist(h_effkine[('gen', 'nocuts', var)], df[['fJetPt_gen', 'fPt', f'{var}_gen']])
439-
df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) &
440-
(df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) &
441-
(df[f'{var}'] >= axis_var_det.GetXmin()) & (df[f'{var}'] < axis_var_det.GetXmax())]
442-
fill_hist(h_effkine[('gen', 'cut', var)], df[['fJetPt_gen', 'fPt', f'{var}_gen']])
445+
# df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) &
446+
# (df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) &
447+
# (df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())]
448+
fill_hist(h_effkine[('gen', 'nocuts', var)], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen']])
449+
# df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) &
450+
# (df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) &
451+
# (df[f'{var}'] >= axis_var_det.GetXmin()) & (df[f'{var}'] < axis_var_det.GetXmax())]
452+
fill_hist(h_effkine[('gen', 'cut', var)], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen']])

machine_learning_hep/utils/hist.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ def norm_response(response, dim_out):
305305
set_bin_val(response_norm, bin_out + bin_in, get_bin_val(response, bin_out + bin_in) / norm)
306306
set_bin_err(response_norm, bin_out + bin_in, get_bin_err(response, bin_out + bin_in) / norm)
307307
total += get_bin_val(response_norm, bin_out + bin_in)
308-
print(f'distributed {bin_in=} to {total=} counts')
308+
# print(f'distributed {bin_in=} to {total=} counts')
309309
return response_norm
310310

311311

@@ -320,13 +320,17 @@ def fold_hist(hist, response):
320320
total = 0.
321321
for bin_out in itertools.product(*(range(1, get_nbins(hfold, i) + 1) for i in range(get_dim(hfold)))):
322322
total += get_bin_val(response, bin_out + bin_in)
323-
print(f'redistributed {bin_in=} to {total=} counts')
323+
# print(f'redistributed {bin_in=} to {total=} counts')
324324

325325
for bin_out in itertools.product(*(range(1, get_nbins(hfold, i) + 1) for i in range(get_dim(hfold)))):
326326
val = 0.
327327
err = 0.
328328
for bin_in in itertools.product(*(range(1, get_nbins(hist, i) + 1) for i in range(get_dim(hist)))):
329-
print(f'{bin_out=} collecting {bin_in=} with weight {get_bin_val(response, bin_out + bin_in)}')
329+
# if bin_in == bin_out:
330+
# val = get_bin_val(hist, bin_in)
331+
# err = get_bin_err(hist, bin_in)**2
332+
# continue
333+
# print(f'{bin_out=} collecting {bin_in=} with weight {get_bin_val(response, bin_out + bin_in)}')
330334
val += get_bin_val(hist, bin_in) * get_bin_val(response, bin_out + bin_in)
331335
err += get_bin_err(hist, bin_in)**2 * get_bin_val(response, bin_out + bin_in)**2
332336
set_bin_val(hfold, bin_out, val)

0 commit comments

Comments
 (0)