Skip to content

Commit

Permalink
Add report of Pbench Agent version statistics (#3624)
Browse files Browse the repository at this point in the history
* Add report of Pbench Agent version statistics

Add a new `--agent` option and generator to report on the Pbench Agent
versions present on the server. By default we report on the "main"
versions, like "0.50" and "0.73"; by adding `--detail` it'll report the
much longer list of full agent versions ("v0.73-2g6da0cfc8d") as well as
"nonsense" version metadata like "system".

This also recognizes the `--since` and `--until` options to report on
agent versions that appear within a specific time range.

The report is sorted by the last date a given version was seen, which
makes it a bit easier to see that only 0.73 has been used since March 08 2024.

```console
Dataset statistics by Pbench Agent version:
    Count           Version            First         Last
  ---------- ---------------------- ------------ ------------
           1         '0.37'          Apr 13 2012  Apr 13 2012
          68         '0.44'          Jan 04 2018  Feb 06 2018
          84         '0.46'          Jan 03 2018  Mar 09 2018
       1,341         '0.47'          Jan 02 2018  Apr 03 2018
       2,197         '0.49'          Mar 21 2018  Aug 04 2018
       1,388         '0.48'          Feb 06 2018  Aug 14 2018
         171         '0.51'          Aug 10 2018  Aug 31 2018
       4,962         '0.50'          May 11 2018  Sep 25 2018
         494         '0.52'          Aug 24 2018  Jan 02 2019
       1,942         '0.53'          Sep 13 2018  May 29 2019
         898         '0.58'          Apr 08 2019  May 30 2019
         246         '0.55'          Jan 28 2019  Jun 06 2019
       1,205         '0.54'          Nov 27 2018  Jul 01 2019
           1         '0.61'          Jul 08 2019  Jul 08 2019
         532         '0.57'          Mar 15 2019  Aug 28 2019
         382         '0.62'          Jul 17 2019  Sep 10 2019
       1,426         '0.56'          Feb 11 2019  Oct 16 2019
       1,067         '0.59'          Apr 30 2019  Nov 12 2019
       1,454         '0.63'          Jul 31 2019  Dec 18 2019
       2,151         '0.65'          Sep 27 2019  Feb 21 2020
       1,342         '0.64'          Aug 27 2019  Mar 26 2020
       1,587         '0.60'          May 25 2019  May 22 2020
       5,255         '0.66'          Nov 07 2019  Jul 10 2020
       4,596         '0.67'          Jan 16 2020  Nov 30 2020
          33         '0.70'          Nov 18 2020  Jan 12 2021
       7,427         '0.68'          Apr 01 2020  Apr 27 2021
      54,179         '0.69'          Jun 25 2020  Mar 08 2023
      44,870         '0.71'          Oct 17 2020       Feb 28
       7,073         '0.72'          Jun 24 2022       Mar 08
       3,977         '0.73'          Aug 14 2023        today
```

I won't capture the full list here (it's much longer), but the "nonsense"
version report is currently:

```console
Datasets with nonsensical version metadata:
    Count           Version            First         Last
  ---------- ---------------------- ------------ ------------
          37        'system'         Mar 30 2019  Apr 01 2019
          54       'plugins:'        Jan 26 2018  Apr 27 2021
           5           ''            Oct 02 2018  Dec 20 2021
           3 'v(unknown)-g(unknown)'  Dec 14 2020  Sep 30 2022
```

* Tweaks
  • Loading branch information
dbutenhof authored Jul 22, 2024
1 parent a7419ce commit 17bff6b
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 3 deletions.
130 changes: 127 additions & 3 deletions lib/pbench/cli/server/report.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import defaultdict
from dataclasses import dataclass
import datetime
from operator import and_
from pathlib import Path
Expand Down Expand Up @@ -384,9 +385,6 @@ def summarize_dates(base_query: Query, options: dict[str, Any]):
since = options.get("since")
until = options.get("until")

if since and until and since > until:
raise Exception("The --until value must be later than the --since value")

by_year = defaultdict(int)
by_month = defaultdict(int)
by_day = defaultdict(int)
Expand Down Expand Up @@ -514,6 +512,118 @@ def report_uploads(options: dict[str, Any]):
summarize_dates(rows, options)


def report_agent(options: dict[str, Any]):
"""Report dataset statistics by agent version"""

v_pattern = re.compile(r"(?P<major>\d+\.\d+)(?:\.\d+)?(?:-\w+)")

@dataclass
class Daterange:
first: Optional[datetime.datetime] = None
last: Optional[datetime.datetime] = None

def add(self, date: datetime.datetime):
if self.first is None or date < self.first:
self.first = date
if self.last is None or date > self.last:
self.last = date

def print_versions(
target: dict[str, Daterange], counts: dict[str, int], quote: bool = False
):
cw = 10
vw = 23
dw = 11
click.echo(
f" {'Count':^{cw}s} {'Version':^{vw}s} {'First':^{dw}s} {'Last':^{dw}s}"
)
click.echo(f" {'':-<{cw}} {'':-<{vw}} {'':-<{dw}} {'':-<{dw}}")
for version, dates in sorted(target.items(), key=lambda k: k[1].last):
count = counts[version]
first = humanize.naturaldate(dates.first)
last = humanize.naturaldate(dates.last)
v = "'" + version + "'" if quote else version
click.echo(f" {count:>{cw},d} {v:^{vw}s} {first:>{dw}s} {last:>{dw}s}")

watcher.update("analyzing version patterns")
since = options.get("since")
until = options.get("until")

# Create a subquery from our basic select parameters so that we can use
# the label (SQL "AS date") in our WHERE filter clauses. (In a direct query
# PostgreSQL doesn't allow filtering on renamed columns.)
subquery = (
Database.db_session.query(
cast(Metadata.value["pbench", "date"].as_string(), TZDateTime).label(
"date"
),
Metadata.value["pbench", "rpm-version"].as_string().label("version"),
)
.filter(Metadata.key == "metalog")
.subquery()
)

count = 0
dateless = 0
versionless = 0

versions = defaultdict(int)
majorversions = defaultdict(int)
nonversions = defaultdict(int)
range = defaultdict(Daterange)
majorrange = defaultdict(Daterange)
nonversionrange = defaultdict(Daterange)

filters = []

query = Database.db_session.query(subquery.c.date, subquery.c.version).order_by(
subquery.c.date
)

if since:
verifier.status(f"Filter since {since}")
filters.append(subquery.c.date >= since)
if until:
verifier.status(f"Filter until {until}")
filters.append(subquery.c.date <= until)
if filters:
query = query.filter(*filters)
rows = query.execution_options(stream_results=True).yield_per(SQL_CHUNK)

for row in rows:
count += 1
date: datetime.datetime = row[0]
version = row[1]
if not isinstance(version, str):
versionless += 1
continue
if not isinstance(date, datetime.datetime):
dateless += 1
date = datetime.datetime.fromtimestamp(0.0)
m = v_pattern.search(version)
if m:
maj = m.group("major")
versions[version] += 1
majorversions[maj] += 1
range[version].add(date)
majorrange[maj].add(date)
else:
nonversions[version] += 1
nonversionrange[version].add(date)

click.echo("Dataset statistics by Pbench Agent version:")
print_versions(majorrange, majorversions)
if options.get("detail"):
click.echo("Dataset statistics by full Pbench Agent version:")
print_versions(range, versions)
click.echo("Datasets with nonsensical version metadata:")
print_versions(nonversionrange, nonversions, quote=True)
if dateless:
click.echo(f"{dateless:,d} datasets lack a date")
if versionless:
click.echo(f"{versionless:,d} datasets lack a Pbench Agent version")


def report_audit():
"""Report audit log statistics."""

Expand Down Expand Up @@ -693,6 +803,12 @@ def report_states():

@click.command(name="pbench-report-generator")
@pass_cli_context
@click.option(
"--agent",
default=False,
is_flag=True,
help="Display Pbench Agent version statistics",
)
@click.option("--all", "-a", default=False, is_flag=True, help="Display full report")
@click.option(
"--archive", "-A", default=False, is_flag=True, help="Display archive statistics"
Expand Down Expand Up @@ -766,6 +882,12 @@ def report(context: object, **kwargs):
rv = 0

try:

since = kwargs.get("since")
until = kwargs.get("until")
if since and until and since > until:
raise Exception("The --until value must be later than the --since value")

config = config_setup(context)
logger = get_pbench_logger("pbench-report-generator", config)
cache_m = CacheManager(config, logger)
Expand All @@ -790,6 +912,8 @@ def report(context: object, **kwargs):
else:
click.echo(f"Unexpected statistics option {stats}", err=True)
rv = 1
if kwargs.get("all") or kwargs.get("agent"):
report_agent(kwargs)
if kwargs.get("all") or kwargs.get("audit"):
report_audit()
if kwargs.get("all") or kwargs.get("sql"):
Expand Down
1 change: 1 addition & 0 deletions server/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ flask-sqlalchemy
gunicorn
humanfriendly
humanize
numpy<2.0 # Indirect: elasticsearch
pquisby
psycopg2
pyesbulk>=2.0.1
Expand Down

0 comments on commit 17bff6b

Please sign in to comment.