diff --git a/benchmarks/pandas/bench_cum_ops.py b/benchmarks/pandas/bench_cum_ops.py new file mode 100644 index 00000000..d6d39409 --- /dev/null +++ b/benchmarks/pandas/bench_cum_ops.py @@ -0,0 +1,65 @@ +""" +Benchmark: Series.cumsum / cumprod / cummax / cummin / DataFrame.cumsum +Mirrors tsb bench_cum_ops.ts +""" +import json +import time +import pandas as pd + +N = 100_000 +WARMUP = 5 +ITERS = 20 + +# Matching data +data = [(i % 100) + 1 for i in range(N)] +series = pd.Series(data, dtype=float) + +col1 = [(i % 100) + 1 for i in range(N)] +col2 = [((i * 3) % 100) + 1 for i in range(N)] +df = pd.DataFrame({"a": col1, "b": col2}, dtype=float) + +# Warm-up +for _ in range(WARMUP): + series.cumsum() + series.cummax() + df.cumsum() + +# Measured: cumsum +t0 = time.perf_counter() +for _ in range(ITERS): + series.cumsum() +total_cumsum = (time.perf_counter() - t0) * 1000 + +# Measured: cumprod +t0 = time.perf_counter() +for _ in range(ITERS): + series.cumprod() +total_cumprod = (time.perf_counter() - t0) * 1000 + +# Measured: cummax +t0 = time.perf_counter() +for _ in range(ITERS): + series.cummax() +total_cummax = (time.perf_counter() - t0) * 1000 + +# Measured: cummin +t0 = time.perf_counter() +for _ in range(ITERS): + series.cummin() +total_cummin = (time.perf_counter() - t0) * 1000 + +# Measured: DataFrame.cumsum +t0 = time.perf_counter() +for _ in range(ITERS): + df.cumsum() +total_df = (time.perf_counter() - t0) * 1000 + +total_ms = total_cumsum + total_cumprod + total_cummax + total_cummin + total_df +mean_ms = total_ms / (ITERS * 5) + +print(json.dumps({ + "function": "cum_ops", + "mean_ms": round(mean_ms, 4), + "iterations": ITERS * 5, + "total_ms": round(total_ms, 4), +})) diff --git a/benchmarks/pandas/bench_replace.py b/benchmarks/pandas/bench_replace.py new file mode 100644 index 00000000..87e3d17c --- /dev/null +++ b/benchmarks/pandas/bench_replace.py @@ -0,0 +1,47 @@ +""" +Benchmark: Series.replace / DataFrame.replace +Mirrors tsb bench_replace.ts +""" +import json +import time +import pandas as pd +import numpy as np + +N = 100_000 +WARMUP = 5 +ITERS = 20 + +# Build data matching the TypeScript benchmark +data = [i % 10 for i in range(N)] +series = pd.Series(data) + +col1 = [i % 10 for i in range(N)] +col2 = [(i * 3) % 10 for i in range(N)] +df = pd.DataFrame({"a": col1, "b": col2}) + +# Warm-up +for _ in range(WARMUP): + series.replace(5, 99) + df.replace(5, 99) + +# Measured: Series.replace scalar +t0 = time.perf_counter() +for i in range(ITERS): + series.replace(i % 10, 99) +total_series = (time.perf_counter() - t0) * 1000 + +# Measured: DataFrame.replace scalar +t0 = time.perf_counter() +for i in range(ITERS): + df.replace(i % 10, 99) +total_df = (time.perf_counter() - t0) * 1000 + +total_ms = total_series + total_df +mean_ms = total_ms / (ITERS * 2) + +print(json.dumps({ + "function": "replace", + "mean_ms": round(mean_ms, 4), + "iterations": ITERS * 2, + "total_ms": round(total_ms, 4), +})) diff --git a/benchmarks/tsb/bench_cum_ops.ts b/benchmarks/tsb/bench_cum_ops.ts new file mode 100644 index 00000000..5750c25f --- /dev/null +++ b/benchmarks/tsb/bench_cum_ops.ts @@ -0,0 +1,70 @@ +/** + * Benchmark: cumsum / cumprod / cummax / cummin (Series and DataFrame) + * Mirrors pandas Series.cumsum(), DataFrame.cumsum(), etc. + */ +import { Series, DataFrame } from "../../src/index.ts"; +import { + cumsum, + cumprod, + cummax, + cummin, + dataFrameCumsum, +} from "../../src/stats/cum_ops.ts"; + +const N = 100_000; + +// Numeric series for cumsum/cumprod/cummax/cummin +const data = Array.from({ length: N }, (_, i) => (i % 100) + 1); +const series = new Series({ data }); + +// DataFrame with two columns +const col1 = Array.from({ length: N }, (_, i) => (i % 100) + 1); +const col2 = Array.from({ length: N }, (_, i) => ((i * 3) % 100) + 1); +const df = DataFrame.fromColumns({ a: col1, b: col2 }); + +const WARMUP = 5; +const ITERS = 20; + +// --- warm-up --- +for (let i = 0; i < WARMUP; i++) { + cumsum(series); + cummax(series); + dataFrameCumsum(df); +} + +// --- measured: cumsum --- +const t0cs = performance.now(); +for (let i = 0; i < ITERS; i++) cumsum(series); +const totalCumsum = performance.now() - t0cs; + +// --- measured: cumprod --- +const t0cp = performance.now(); +for (let i = 0; i < ITERS; i++) cumprod(series); +const totalCumprod = performance.now() - t0cp; + +// --- measured: cummax --- +const t0cx = performance.now(); +for (let i = 0; i < ITERS; i++) cummax(series); +const totalCummax = performance.now() - t0cx; + +// --- measured: cummin --- +const t0cn = performance.now(); +for (let i = 0; i < ITERS; i++) cummin(series); +const totalCummin = performance.now() - t0cn; + +// --- measured: dataFrameCumsum --- +const t0df = performance.now(); +for (let i = 0; i < ITERS; i++) dataFrameCumsum(df); +const totalDf = performance.now() - t0df; + +const total_ms = totalCumsum + totalCumprod + totalCummax + totalCummin + totalDf; +const mean_ms = total_ms / (ITERS * 5); + +console.log( + JSON.stringify({ + function: "cum_ops", + mean_ms: parseFloat(mean_ms.toFixed(4)), + iterations: ITERS * 5, + total_ms: parseFloat(total_ms.toFixed(4)), + }), +); diff --git a/benchmarks/tsb/bench_replace.ts b/benchmarks/tsb/bench_replace.ts new file mode 100644 index 00000000..e23c9aad --- /dev/null +++ b/benchmarks/tsb/bench_replace.ts @@ -0,0 +1,53 @@ +/** + * Benchmark: replaceSeries / replaceDataFrame + * Mirrors pandas Series.replace() and DataFrame.replace(). + */ +import { Series, DataFrame } from "../../src/index.ts"; +import { replaceSeries, replaceDataFrame } from "../../src/stats/replace.ts"; + +const N = 100_000; + +// Build a numeric series with values 0–9 (cycled) for scalar replace +const data = Array.from({ length: N }, (_, i) => i % 10); +const series = new Series({ data }); + +// Build a DataFrame with two numeric columns +const col1 = Array.from({ length: N }, (_, i) => i % 10); +const col2 = Array.from({ length: N }, (_, i) => (i * 3) % 10); +const df = DataFrame.fromColumns({ a: col1, b: col2 }); + +const WARMUP = 5; +const ITERS = 20; + +// --- warm-up --- +for (let i = 0; i < WARMUP; i++) { + replaceSeries(series, { toReplace: 5, value: 99 }); + replaceDataFrame(df, { toReplace: 5, value: 99 }); +} + +// --- measured: replaceSeries scalar --- +const t0s = performance.now(); +for (let i = 0; i < ITERS; i++) { + replaceSeries(series, { toReplace: i % 10, value: 99 }); +} +const totalSeries = performance.now() - t0s; + +// --- measured: replaceDataFrame scalar --- +const t0d = performance.now(); +for (let i = 0; i < ITERS; i++) { + replaceDataFrame(df, { toReplace: i % 10, value: 99 }); +} +const totalDf = performance.now() - t0d; + +// Report the average of the two operations +const total_ms = totalSeries + totalDf; +const mean_ms = total_ms / (ITERS * 2); + +console.log( + JSON.stringify({ + function: "replace", + mean_ms: parseFloat(mean_ms.toFixed(4)), + iterations: ITERS * 2, + total_ms: parseFloat(total_ms.toFixed(4)), + }), +);