From 720367e17e125b4dbba60af7669aa8f907fab88a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADt=20Ku=C4=8Dera?= Date: Mon, 2 Sep 2024 14:20:16 +0200 Subject: [PATCH 1/2] Fix logging messages --- .../analysis/analyzer_jets.py | 70 +++++++++---------- .../analysis/do_systematics.py | 4 +- machine_learning_hep/globalfitter.py | 6 +- machine_learning_hep/multiprocesser.py | 2 +- machine_learning_hep/optimiser.py | 4 +- machine_learning_hep/processer.py | 22 +++--- machine_learning_hep/processer_jet.py | 16 ++--- machine_learning_hep/steer_analysis.py | 2 +- machine_learning_hep/utilities.py | 8 +-- machine_learning_hep/utilities_files.py | 6 +- 10 files changed, 70 insertions(+), 70 deletions(-) diff --git a/machine_learning_hep/analysis/analyzer_jets.py b/machine_learning_hep/analysis/analyzer_jets.py index 699011d7dd..2fac64953b 100644 --- a/machine_learning_hep/analysis/analyzer_jets.py +++ b/machine_learning_hep/analysis/analyzer_jets.py @@ -106,7 +106,7 @@ def _save_canvas(self, canvas, filename): def _save_hist(self, hist, filename, option = '', logy = False): if not hist: - self.logger.error('no histogram for <%s>', filename) + self.logger.error('No histogram for <%s>', filename) # TODO: remove file if it exists? return c = TCanvas() @@ -283,7 +283,7 @@ def calculate_efficiencies(self): def _correct_efficiency(self, hist, ipt): if not hist: - self.logger.error('no histogram to correct for efficiency') + self.logger.error('No histogram to correct for efficiency') return if self.cfg('efficiency.correction_method') == 'run3': @@ -292,9 +292,9 @@ def _correct_efficiency(self, hist, ipt): self.logger.info('Using Run 3 efficiency %g instead of %g', eff, eff_old) hist.Scale(1. / eff) elif self.cfg('efficiency.correction_method') == 'run2_2d': - self.logger.info('using Run 2 efficiencies per jet pt bin') + self.logger.info('Using Run 2 efficiencies per jet pt bin') if not self.h_eff_ptjet_pthf['pr']: - self.logger.error('no efficiency available for %s', hist.GetName()) + self.logger.error('No efficiency available for %s', hist.GetName()) return for iptjet in range(get_nbins(hist, 0)): @@ -308,7 +308,7 @@ def _correct_efficiency(self, hist, ipt): else: self.logger.info('Correcting with Run 2 efficiencies') if not self.hcandeff['pr']: - self.logger.error('no efficiency available for %s', hist.GetName()) + self.logger.error('No efficiency available for %s', hist.GetName()) return eff = self.hcandeff['pr'].GetBinContent(ipt + 1) @@ -319,7 +319,7 @@ def _correct_efficiency(self, hist, ipt): hist.GetName(), ipt) return - self.logger.debug('scaling hist %s (ipt %i) with 1. / %g', hist.GetName(), ipt, eff) + self.logger.debug('Scaling hist %s (ipt %i) with 1. / %g', hist.GetName(), ipt, eff) hist.Scale(1. / eff) @@ -408,7 +408,7 @@ def fit(self): self.logger.critical("Histogram %s not found.", name_histo) for iptjet, ipt in itertools.product(itertools.chain((None,), range(get_nbins(h, 1))), range(get_nbins(h, 2))): - self.logger.debug('fitting %s: %s, %i', level, iptjet, ipt) + self.logger.debug('Fitting %s: %s, %i', level, iptjet, ipt) axis_ptjet = get_axis(h, 1) cuts_proj = {2: (ipt+1, ipt+1)} if iptjet is not None: @@ -460,7 +460,7 @@ def fit(self): continue roows = self.roows.get(ipt) if iptjet is None else self.roows_ptjet.get((iptjet, ipt)) if roows is None and level != self.fit_levels[0]: - self.logger.warning('missing previous fit result, skipping %s iptjet %s ipt %d', + self.logger.warning('Missing previous fit result, skipping %s iptjet %s ipt %d', level, iptjet, ipt) continue for par in fitcfg.get('fix_params', []): @@ -499,7 +499,7 @@ def fit(self): self.fit_func_bkg[level][ipt] = roo_ws.pdf("bkg").asTF(roo_ws.var(varname_m)) self.fit_range[level][ipt] = (roo_ws.var(varname_m).getMin('fit'), roo_ws.var(varname_m).getMax('fit')) - self.logger.debug('fit range for %s-%i: %s', level, ipt, self.fit_range[level][ipt]) + self.logger.debug('Fit range for %s-%i: %s', level, ipt, self.fit_range[level][ipt]) #region sidebands # pylint: disable=too-many-branches,too-many-statements,too-many-locals @@ -508,19 +508,19 @@ def _subtract_sideband(self, hist, var, mcordata, ipt): Subtract sideband distributions, assuming mass on first axis """ if not hist: - self.logger.error('no histogram for %s bin %d', var, ipt) + self.logger.error('No histogram for %s bin %d', var, ipt) return None label = f'-{var}' if var else '' ptrange = (self.bins_candpt[ipt], self.bins_candpt[ipt+1]) self._save_hist(hist, f'sideband/h_mass-ptjet{label}_pthf-{ptrange[0]}-{ptrange[1]}_{mcordata}.png') mean = self.fit_mean[mcordata][ipt] - # self.logger.info('means %g, %g', mean, self.roows[ipt].var('mean').getVal()) + # self.logger.info('Means %g, %g', mean, self.roows[ipt].var('mean').getVal()) sigma = self.fit_sigma[mcordata][ipt] - # self.logger.info('sigmas %g, %g', sigma, self.roows[ipt].var('sigma_g1').getVal()) + # self.logger.info('Sigmas %g, %g', sigma, self.roows[ipt].var('sigma_g1').getVal()) fit_range = self.fit_range[mcordata][ipt] if mean is None or sigma is None or fit_range is None: - self.logger.error('no fit parameters for %s bin %s-%d', var or 'none', mcordata, ipt) + self.logger.error('No fit parameters for %s bin %s-%d', var or 'none', mcordata, ipt) return None for entry in self.cfg('sidesub', []): @@ -538,15 +538,15 @@ def _subtract_sideband(self, hist, var, mcordata, ipt): 'sideband_right': (mean + regcfg['right'][0] * sigma, mean + regcfg['right'][1] * sigma) } if regions['sideband_left'][1] < fit_range[0] or regions['sideband_right'][0] > fit_range[1]: - self.logger.critical('sidebands %s for %s-%i not in fit range %s, fix regions in DB!', + self.logger.critical('Sidebands %s for %s-%i not in fit range %s, fix regions in DB!', regions, mcordata, ipt, fit_range) for reg, lim in regions.items(): if lim[0] < fit_range[0] or lim[1] > fit_range[1]: regions[reg] = (max(lim[0], fit_range[0]), min(lim[1], fit_range[1])) - self.logger.warning('region %s for %s bin %d (%s) extends beyond fit range: %s, clipping to %s', + self.logger.warning('Region %s for %s bin %d (%s) extends beyond fit range: %s, clipping to %s', reg, mcordata, ipt, ptrange, lim, regions[reg]) if regions[reg][1] < regions[reg][0]: - self.logger.error('region limits inverted, reducing to zero width') + self.logger.error('Region limits inverted, reducing to zero width') regions[reg] = (regions[reg][0], regions[reg][0]) axis = get_axis(hist, 0) bins = {key: (axis.FindBin(region[0]), axis.FindBin(region[1]) - 1) for key, region in regions.items()} @@ -581,7 +581,7 @@ def _subtract_sideband(self, hist, var, mcordata, ipt): self.logger.error('Could not retrieve roows for %s-%i-%i', mcordata, iptjet, ipt) continue area = {region: f.Integral(*limits[region]) for region in regions} - self.logger.info('areas for %s-%s: %g, %g, %g', + self.logger.info('Areas for %s-%s: %g, %g, %g', mcordata, ipt, area['signal'], area['sideband_left'], area['sideband_right']) if (area['sideband_left'] + area['sideband_right']) > 0.: subtract_sidebands = True @@ -594,7 +594,7 @@ def _subtract_sideband(self, hist, var, mcordata, ipt): f = self.roo_ws[mcordata][ipt].pdf("bkg").asTF(self.roo_ws[mcordata][ipt].var("m")) area[region] = f.Integral(*limits[region]) - self.logger.info('areas for %s-%s: %g, %g, %g', + self.logger.info('Areas for %s-%s: %g, %g, %g', mcordata, ipt, area['signal'], area['sideband_left'], area['sideband_right']) if (area['sideband_left'] + area['sideband_right']) > 0.: @@ -683,7 +683,7 @@ def _subtract_sideband(self, hist, var, mcordata, ipt): frac_sig = pdf_sig.createIntegral(var_m, ROOT.RooFit.NormSet(var_m), ROOT.RooFit.Range('signal')).getVal() if pdf_peak := self.roows[ipt].pdf('peak'): frac_peak = pdf_peak.createIntegral(var_m, ROOT.RooFit.NormSet(var_m), ROOT.RooFit.Range('signal')).getVal() - self.logger.info('correcting %s-%i for fractional signal area: %g (Gaussian: %g)', + self.logger.info('Correcting %s-%i for fractional signal area: %g (Gaussian: %g)', mcordata, ipt, frac_sig, frac_peak) fh_subtracted.Scale(1. / frac_sig) @@ -701,7 +701,7 @@ def _analyze(self, method = 'sidesub'): for var in [None] + self.observables['all']: self.logger.info('Running analysis for %s using %s', var, method) label = f'-{var}' if var else '' - self.logger.debug('looking for %s', f'h_mass-ptjet-pthf{label}') + self.logger.debug('Looking for %s', f'h_mass-ptjet-pthf{label}') if fh := rfile.Get(f'h_mass-ptjet-pthf{label}'): # TODO: add sanity check axes_proj = list(range(get_dim(fh))) axes_proj.remove(2) @@ -716,23 +716,23 @@ def _analyze(self, method = 'sidesub'): elif method == 'sigextr': h = self._extract_signal(h_in, var, mcordata, ipt) else: - self.logger.critical('invalid method %s', method) + self.logger.critical('Invalid method %s', method) self._save_hist(h, f'h_ptjet{label}_{method}_noeff_{mcordata}_pt{ipt}.png') if mcordata == 'mc': h_proj = project_hist(h_in, axes_proj[1:], {}) h_proj_lim = project_hist(h_in, axes_proj[1:], {0: (1, get_nbins(h_in, 0))}) self._save_hist(h_proj, f'h_ptjet{label}_proj_noeff_{mcordata}_pt{ipt}.png') if h and h_proj: - self.logger.debug('signal loss %s-%i: %g, fraction in under-/overflow: %g', + self.logger.debug('Signal loss %s-%i: %g, fraction in under-/overflow: %g', mcordata, ipt, 1. - h.Integral()/h_proj.Integral(), 1. - h_proj_lim.Integral()/h_proj.Integral()) if self.cfg('closure.pure_signal'): - self.logger.debug('assuming pure signal, using projection') + self.logger.debug('Assuming pure signal, using projection') h = h_proj # Efficiency correction if mcordata == 'data' or not self.cfg('closure.use_matched'): - self.logger.info('correcting efficiency') + self.logger.info('Correcting efficiency') self._correct_efficiency(h, ipt) fh_sub.append(h) fh_sum = sum_hists(fh_sub) @@ -839,13 +839,13 @@ def _extract_signal(self, hist, var, mcordata, ipt): Extract signal through inv. mass fit (first axis) in bins of other axes """ if not hist: - self.logger.warning('no histogram for %s bin %d', var, ipt) + self.logger.warning('No histogram for %s bin %d', var, ipt) return None ptrange = (self.bins_candpt[ipt], self.bins_candpt[ipt+1]) self._save_hist(hist, f'signalextr/h_mass-{var}_pthf-{ptrange[0]}-{ptrange[1]}_{mcordata}.png') if self.fit_mean[mcordata][ipt] is None or self.fit_sigma[mcordata][ipt] is None: - self.logger.warning('no fit parameters for %s bin %s-%d', var, mcordata, ipt) + self.logger.warning('No fit parameters for %s bin %s-%d', var, mcordata, ipt) return None # TODO: should we continue nonetheless? axes = list(range(get_dim(hist))) @@ -887,9 +887,9 @@ def estimate_feeddown(self): with TFile(self.cfg('fd_root')) as rfile: powheg_xsection = rfile.Get('fHistXsection') powheg_xsection_scale_factor = powheg_xsection.GetBinContent(1) / powheg_xsection.GetEntries() - self.logger.info('powheg scale factor %g', powheg_xsection_scale_factor) - self.logger.info('number of collisions in data: %g', self.n_colls['data']) - self.logger.info('number of collisions in MC: %g', self.n_colls['mc']) + self.logger.info('POWHEG scale factor %g', powheg_xsection_scale_factor) + self.logger.info('Number of collisions in data: %g', self.n_colls['data']) + self.logger.info('Number of collisions in MC: %g', self.n_colls['mc']) df = pd.read_parquet(self.cfg('fd_parquet')) col_mapping = {'dr': 'delta_r_jet', 'zpar': 'z'} # TODO: check mapping @@ -907,14 +907,14 @@ def estimate_feeddown(self): elif binning := self.cfg(f'observables.{var}.bins_fix'): bins_tmp = bin_array(*binning) else: - self.logger.error('no binning specified for %s, using defaults', var) + self.logger.error('No binning specified for %s, using defaults', var) bins_tmp = bin_array(10, 0., 1.) bins_obs[var] = bins_tmp colname = col_mapping.get(var, f'{var}_jet') if f'{colname}' not in df: if var is not None: - self.logger.error('No feeddown information for %s (%s), cannot estimate feeddown', var, colname) + self.logger.error('No feed-down information for %s (%s), cannot estimate feeddown', var, colname) continue # TODO: derive histogram @@ -1000,7 +1000,7 @@ def _build_response_matrix(self, h_response, h_eff = None): np.asarray([hbin[0][0], hbin[1][0], hbin[2][0], hbin[3][0], hbin[4][0]], 'i')) eff = h_eff.GetBinContent(hbin[4][0]) if h_eff else 1. if np.isclose(eff, 0.): - self.logger.error('efficiency 0 for %s', hbin[4]) + self.logger.error('Efficiency 0 for %s', hbin[4]) continue for _ in range(int(n)): rm.Fill(hbin[0][1], hbin[1][1], hbin[2][1], hbin[3][1], 1./eff) @@ -1011,7 +1011,7 @@ def _build_response_matrix(self, h_response, h_eff = None): def _subtract_feeddown(self, hist, var, mcordata): if var not in self.hfeeddown_det[mcordata]: if var is not None: - self.logger.error('No feeddown information available for %s, cannot subtract', var) + self.logger.error('No feed-down information available for %s, cannot subtract', var) return if h_fd := self.hfeeddown_det[mcordata][var]: if get_dim(hist) == 1: @@ -1019,7 +1019,7 @@ def _subtract_feeddown(self, hist, var, mcordata): assert get_dim(h_fd) == get_dim(hist) hist.Add(h_fd, -1) else: - self.logger.error('No feeddown estimation available for %s (%s)', var, mcordata) + self.logger.error('No feed-down estimation available for %s (%s)', var, mcordata) #region unfolding @@ -1043,7 +1043,7 @@ def _unfold(self, hist, var, mcordata): fh_unfolding_input = hist.Clone('fh_unfolding_input') if get_dim(fh_unfolding_input) != get_dim(h_effkine_det): - self.logger.error('histograms with different dimensions, cannot unfold') + self.logger.error('Histograms with different dimensions, cannot unfold') return [] ensure_sumw2(fh_unfolding_input) fh_unfolding_input.Multiply(h_effkine_det) diff --git a/machine_learning_hep/analysis/do_systematics.py b/machine_learning_hep/analysis/do_systematics.py index 935edc99b8..16d5cc99a0 100644 --- a/machine_learning_hep/analysis/do_systematics.py +++ b/machine_learning_hep/analysis/do_systematics.py @@ -128,7 +128,7 @@ def __init__(self, path_database_analysis: str, typean: str, var: str): elif binning := self.cfg(f'observables.{var}.bins_fix'): bins_tmp = bin_array(*binning) else: - self.logger.error('no binning specified for %s, using defaults', var) + self.logger.error('No binning specified for %s, using defaults', var) bins_tmp = bin_array(10, 0., 1.) binning_obs_rec = bins_tmp self.n_bins_obs_rec = len(binning_obs_rec) - 1 @@ -151,7 +151,7 @@ def __init__(self, path_database_analysis: str, typean: str, var: str): elif binning := self.cfg(f'observables.{var}.bins_fix'): bins_tmp = bin_array(*binning) else: - self.logger.error('no binning specified for %s, using defaults', var) + self.logger.error('No binning specified for %s, using defaults', var) bins_tmp = bin_array(10, 0., 1.) binning_obs_gen = bins_tmp self.n_bins_obs_gen = len(binning_obs_gen) - 1 diff --git a/machine_learning_hep/globalfitter.py b/machine_learning_hep/globalfitter.py index e0fe6d2bf5..986488ef41 100644 --- a/machine_learning_hep/globalfitter.py +++ b/machine_learning_hep/globalfitter.py @@ -230,7 +230,7 @@ def update_check_signal_fit(self): return "" def derive_yields(self): - self.logger.info("calculate signal, backgroud, S/B, significance") + self.logger.info("Calculate signal, backgroud, S/B, significance") self.mean_fit = self.sig_fit_func.GetParameter(1) self.mean_err_fit = self.sig_fit_func.GetParError(1) # Could be negative together with the integral pre-factor @@ -393,7 +393,7 @@ def fit(self): self.logger.info("Initial parameters for signal fit are") print(f"mean = {self.mean}\nsigma = {self.sigma}") - self.logger.debug("fit background (just side bands)") + self.logger.debug("Fit background (just side bands)") self.histo_to_fit.Fit(self.bkg_sideband_fit_func, ("R,%s,+,0" % (self.fit_options))) # Prepare a function to store the signal parameters which will finally be extracted @@ -413,7 +413,7 @@ def fit(self): self.sig_fit_func.SetParameter(1, self.mean) self.sig_fit_func.SetParameter(2, self.sigma) - self.logger.info("fit all (signal + background)") + self.logger.info("Fit all (signal + background)") self.tot_fit_func.SetLineColor(4) parmin = Double() parmax = Double() diff --git a/machine_learning_hep/multiprocesser.py b/machine_learning_hep/multiprocesser.py index 6cf88f206e..45b4b5e4bd 100755 --- a/machine_learning_hep/multiprocesser.py +++ b/machine_learning_hep/multiprocesser.py @@ -204,7 +204,7 @@ def multi_histomass(self): for indexp, _ in enumerate(self.process_listsample): if self.p_useperiod[indexp] == 1: self.process_listsample[indexp].process_histomass() - self.logger.debug('merging all') + self.logger.debug('Merging all') with tempfile.TemporaryDirectory() as tmp_merged_dir: mergerootfiles(self.lper_filemass, self.filemass_mergedall, tmp_merged_dir) diff --git a/machine_learning_hep/optimiser.py b/machine_learning_hep/optimiser.py index feeadb2964..3ad7392ee6 100644 --- a/machine_learning_hep/optimiser.py +++ b/machine_learning_hep/optimiser.py @@ -215,7 +215,7 @@ def __init__(self, data_param, case, typean, model_config, binmin, self.f_mltest_applied = f"{self.dirmlout}/testsample_{self.s_suffix}_mldecision.pkl" self.df_mltest_applied = None - self.logger.info('training variables: %s', training_var) + self.logger.info('Training variables: %s', training_var) def create_suffix(self): string_selection = createstringselection(self.v_bin, self.p_binmin, self.p_binmax) @@ -249,7 +249,7 @@ def prepare_data_mc_mcgen(self): def preparesample(self): # pylint: disable=too-many-branches - self.logger.info("Prepare Sample") + self.logger.info("Prepare sample") filename_train = \ os.path.join(self.dirmlout, f"df_train_{self.p_binmin}_{self.p_binmax}.pkl") diff --git a/machine_learning_hep/processer.py b/machine_learning_hep/processer.py index d9b5e1113e..0ba89e63f3 100644 --- a/machine_learning_hep/processer.py +++ b/machine_learning_hep/processer.py @@ -328,7 +328,7 @@ def dfmerge(dfl, dfr, **kwargs): try: return pd.merge(dfl, dfr, **kwargs) except Exception as e: - self.logger.error('merging failed: %s', str(e)) + self.logger.error('Merging failed: %s', str(e)) dfl.info() dfr.info() raise e @@ -339,20 +339,20 @@ def dfuse(df_spec): (level in ('mc', 'gen', 'det') and self.mcordata == 'mc') or (level in ('data') and self.mcordata == 'data')) - self.logger.info('unpacking: %s', self.l_root[file_index]) + self.logger.info('Unpacking: %s', self.l_root[file_index]) dfs = {} self.logger.debug(' -> reading') with uproot.open(self.l_root[file_index]) as rfile: df_processed = set() keys = rfile.keys(recursive=False, filter_name='DF_*') - self.logger.info('found %d dataframes, reading %s', len(keys), max_no_keys or "all") + self.logger.info('Found %d dataframes, reading %s', len(keys), max_no_keys or "all") for (idx, key) in enumerate(keys[:max_no_keys]): if not (df_key := re.match('^DF_(\\d+);', key)): continue if (df_no := int(df_key.group(1))) in df_processed: - self.logger.warning('multiple versions of DF %d', df_no) + self.logger.warning('Multiple versions of DF %d', df_no) continue - self.logger.debug('processing DF %d - %d / %d', df_no, idx, len(keys)) + self.logger.debug('Processing DF %d - %d / %d', df_no, idx, len(keys)) df_processed.add(df_no) rdir = rfile[key] @@ -410,19 +410,19 @@ def dfuse(df_spec): out = m_spec.get('out', base) if all([dfuse(self.df_read[base]), dfuse(self.df_read[ref])]): if (on := m_spec.get('use', None)) is not None: - self.logger.info('merging %s with %s on %s into %s', base, ref, on, out) + self.logger.info('Merging %s with %s on %s into %s', base, ref, on, out) if not isinstance(on, list) or 'df' not in on: on = ['df', on] dfs[out] = dfmerge(dfs[base], dfs[ref], on=on) elif (on := m_spec.get('left_on', None)) is not None: - self.logger.info('merging %s with %s on %s into %s', base, ref, on, out) + self.logger.info('Merging %s with %s on %s into %s', base, ref, on, out) if not is_numeric_dtype(dfs[base][on]): - self.logger.info('exploding dataframe %s on variable %s', base, on) + self.logger.info('Exploding dataframe %s on variable %s', base, on) dfs[base] = dfs[base].explode(on) dfs[out] = dfmerge(dfs[base], dfs[ref], left_on=['df', on], right_index=True) else: var = self.df_read[ref]['index'] - self.logger.info('merging %s with %s on %s (default) into %s', base, ref, var, out) + self.logger.info('Merging %s with %s on %s (default) into %s', base, ref, var, out) dfs[out] = dfmerge(dfs[base], dfs[ref], left_on=['df', var], right_index=True) if 'extra' in m_spec: self.logger.debug(' %s -> extra', out) @@ -432,7 +432,7 @@ def dfuse(df_spec): if self.df_write: for df_name, df_spec in self.df_write.items(): if dfuse(df_spec): - self.logger.info('writing %s to %s', df_name, df_spec['file']) + self.logger.info('Writing %s to %s', df_name, df_spec['file']) src = df_spec.get('source', df_name) dfo = dfquery(dfs[src], df_spec.get('filter', None)) path = os.path.join(self.d_pkl, self.l_path[file_index], df_spec['file']) @@ -530,7 +530,7 @@ def process_applymodel_par(self): self.parallelizer(self.applymodel, arguments, self.p_chunksizeskim) def process_mergeforml(self): - self.logger.info("doing merging for ml %s %s", self.mcordata, self.period) + self.logger.info("Doing merging for ml %s %s", self.mcordata, self.period) indices_for_evt = [] for ipt in range(self.p_nptbins): nfiles = len(self.mptfiles_recosk[ipt]) diff --git a/machine_learning_hep/processer_jet.py b/machine_learning_hep/processer_jet.py index 5605efcb98..3fa3686094 100644 --- a/machine_learning_hep/processer_jet.py +++ b/machine_learning_hep/processer_jet.py @@ -76,7 +76,7 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab elif binning := self.cfg(f'observables.{v}.bins_fix'): self.binarrays_obs[level][v] = bin_array(*binning) else: - self.logger.error('no binning specified for %s, using defaults', v) + self.logger.error('No binning specified for %s, using defaults', v) self.binarrays_obs[level][v] = bin_array(10, 0., 1.) if binning := self.cfg(f'observables.{v}.bins_ptjet'): @@ -121,7 +121,7 @@ def _verify_variables(self, dfi): def _calculate_variables(self, df, verify=False): # pylint: disable=invalid-name - self.logger.info('calculating variables') + self.logger.info('Calculating variables') if len(df) == 0: return df df['dr'] = np.sqrt((df.fJetEta - df.fEta)**2 + ((df.fJetPhi - df.fPhi + math.pi) % math.tau - math.pi)**2) @@ -150,7 +150,7 @@ def _calculate_variables(self, df, verify=False): # pylint: disable=invalid-name (lambda ar: np.log(ar.fPtSubLeading * np.sin(ar.fTheta))), axis=1) df['lntheta'] = df['fTheta'].apply(lambda x: -np.log(x)) # df['lntheta'] = np.array(-np.log(df.fTheta)) - self.logger.debug('done') + self.logger.debug('Done') if verify: self._verify_variables(df) return df @@ -175,7 +175,7 @@ def process_histomass_single(self, index): dfcollcnt = read_df(self.l_collcnt[index]) ser_collcnt = dfcollcnt[self.cfg('cnt_events_read', 'fReadSelectedCounts')] collcnt = functools.reduce(lambda x,y: float(x)+float(y), (ar[0] for ar in ser_collcnt)) - self.logger.info('sampled %g collisions', collcnt) + self.logger.info('Sampled %g collisions', collcnt) histonorm.SetBinContent(2, collcnt) get_axis(histonorm, 0).SetBinLabel(1, 'N_{evt}') get_axis(histonorm, 0).SetBinLabel(2, 'N_{coll}') @@ -225,10 +225,10 @@ def process_histomass_single(self, index): self._calculate_variables(df) for obs, spec in self.cfg('observables', {}).items(): - self.logger.debug('preparing histograms for %s', obs) + self.logger.debug('Preparing histograms for %s', obs) var = obs.split('-') if not all(v in df for v in var): - self.logger.error('dataframe does not contain %s', var) + self.logger.error('Dataframe does not contain %s', var) continue h = create_hist( f'h_mass-ptjet-pthf-{obs}', @@ -347,12 +347,12 @@ def process_efficiency_single(self, index): if '-' in var or self.cfg(f'observables.{var}.arraycols'): continue if self.cfg('closure.use_matched'): - self.logger.info('using matched for truth') + self.logger.info('Using matched for truth') df_mcana, _ = self.split_df(dfmatch[cat], self.cfg('frac_mcana', .2)) else: df_mcana, _ = self.split_df(dfgen[cat], self.cfg('frac_mcana', .2)) if f := self.cfg('closure.exclude_feeddown_gen'): - self.logger.debug('excluding feeddown gen') + self.logger.debug('Excluding feeddown gen') dfquery(df_mcana, f, inplace=True) fill_hist(h_mctruth[(cat, var)], df_mcana[['fJetPt_gen', 'fPt_gen', f'{var}_gen']]) diff --git a/machine_learning_hep/steer_analysis.py b/machine_learning_hep/steer_analysis.py index db1d74d139..5ea8b5445a 100644 --- a/machine_learning_hep/steer_analysis.py +++ b/machine_learning_hep/steer_analysis.py @@ -187,7 +187,7 @@ def do_entire_analysis(data_config: dict, data_param: dict, data_param_overwrite exdirs.extend(checkdirs(dirresultsdatatot)) if len(exdirs) > 0: - logger.info('existing directories must be deleted') + logger.info('Existing directories must be deleted') for d in exdirs: print(f'rm -rf {d}') delete = False diff --git a/machine_learning_hep/utilities.py b/machine_learning_hep/utilities.py index 558df79fba..b6ad51f349 100644 --- a/machine_learning_hep/utilities.py +++ b/machine_learning_hep/utilities.py @@ -81,16 +81,16 @@ def openfile(filename, attr): def write_df(dfo, path): - logger.debug("writing df to <%s>", path) + logger.debug("Writing df to <%s>", path) if path.endswith(".parquet"): start = time.time() dfo.to_parquet(path) - logger.debug("written to parquet in %.2f s", time.time() - start) + logger.debug("Written to parquet in %.2f s", time.time() - start) else: start = time.time() with openfile(path, "wb") as file: pickle.dump(dfo, file, pickle.HIGHEST_PROTOCOL) - logger.debug("written to pickle in %.2f s", time.time() - start) + logger.debug("Written to pickle in %.2f s", time.time() - start) def read_df(path, **kwargs): @@ -100,7 +100,7 @@ def read_df(path, **kwargs): else: df = pickle.load(openfile(path, "rb")) except Exception as e: # pylint: disable=broad-except - logger.critical("failed to open file <%s>: %s", path, str(e)) + logger.critical("Failed to open file <%s>: %s", path, str(e)) sys.exit() return df diff --git a/machine_learning_hep/utilities_files.py b/machine_learning_hep/utilities_files.py index a1db3cfd6a..93fc1d5f93 100644 --- a/machine_learning_hep/utilities_files.py +++ b/machine_learning_hep/utilities_files.py @@ -29,7 +29,7 @@ def list_folders(main_dir: str, filenameinput: str, maxfiles: int, select=None): :param select: iterable of substrings that must be contained in folders """ if not os.path.isdir(main_dir): - logger.error("input directory <%s> does not exist", main_dir) + logger.error("Input directory <%s> does not exist", main_dir) files = glob.glob(f"{main_dir}/**/{filenameinput}", recursive=True) listfolders = [os.path.relpath(os.path.dirname(file), main_dir) for file in files] @@ -78,7 +78,7 @@ def checkmakedir(mydir: str): if os.path.exists(mydir): logger.warning("Using existing folder %s", mydir) return - logger.debug("creating folder %s", mydir) + logger.debug("Creating folder %s", mydir) os.makedirs(mydir) @@ -101,7 +101,7 @@ def delete_dir(path: str): try: shutil.rmtree(path) except OSError: - logger.error("Error: Failed to delete directory %s", path) + logger.error("Failed to delete directory %s", path) return False return True From 2088899bf625509aaa7965871a8281d553b2f408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADt=20Ku=C4=8Dera?= Date: Mon, 2 Sep 2024 14:29:17 +0200 Subject: [PATCH 2/2] Fix case of D0-jet db --- .../data/data_run3/database_ml_parameters_D0Jet_pp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml index 88b52cae96..e1eb7997e1 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml @@ -11,7 +11,7 @@ # You should have received a copy of the GNU General Public License # # along with this program. if not, see . # -D0jet_pp: +D0Jet_pp: doml: true mass: 1.864 sel_reco_unp: "fPt > 1."