diff --git a/benchmarks/pandas/bench_dropna.py b/benchmarks/pandas/bench_dropna.py new file mode 100644 index 00000000..4ccb372e --- /dev/null +++ b/benchmarks/pandas/bench_dropna.py @@ -0,0 +1,43 @@ +""" +Benchmark: dropna on Series and DataFrame (axis=0, how=any, how=all) +""" +import json +import time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +rng = np.random.default_rng(42) +series_data = rng.standard_normal(ROWS) +series_data[::10] = np.nan +s = pd.Series(series_data) + +col_a = rng.standard_normal(ROWS) +col_b = rng.standard_normal(ROWS) +col_c = rng.standard_normal(ROWS) +col_a[::7] = np.nan +col_b[::11] = np.nan +col_c[::13] = np.nan +df = pd.DataFrame({"a": col_a, "b": col_b, "c": col_c}) + +for _ in range(WARMUP): + s.dropna() + df.dropna(how="any") + df.dropna(how="all") + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.dropna() + df.dropna(how="any") + df.dropna(how="all") +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ + "function": "dropna", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/pandas/bench_fillna.py b/benchmarks/pandas/bench_fillna.py new file mode 100644 index 00000000..04c60156 --- /dev/null +++ b/benchmarks/pandas/bench_fillna.py @@ -0,0 +1,43 @@ +""" +Benchmark: fillna on Series and DataFrame (scalar, ffill, bfill) +""" +import json +import time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +rng = np.random.default_rng(42) +series_data = rng.standard_normal(ROWS) +series_data[::10] = np.nan +s = pd.Series(series_data) + +col_a = rng.standard_normal(ROWS) +col_b = rng.standard_normal(ROWS) +col_a[::7] = np.nan +col_b[::11] = np.nan +df = pd.DataFrame({"a": col_a, "b": col_b}) + +for _ in range(WARMUP): + s.fillna(0) + s.ffill() + df.fillna(0) + df.bfill() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.fillna(0) + s.ffill() + df.fillna(0) + df.bfill() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ + "function": "fillna", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/tsb/bench_dropna.ts b/benchmarks/tsb/bench_dropna.ts new file mode 100644 index 00000000..ecad34e7 --- /dev/null +++ b/benchmarks/tsb/bench_dropna.ts @@ -0,0 +1,42 @@ +/** + * Benchmark: dropna on Series and DataFrame (axis=0 and axis=1) + */ +import { Series, DataFrame, dropna, dropnaDataFrame } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +// ~10% missing values +const seriesData = Float64Array.from({ length: ROWS }, (_, i) => + i % 10 === 0 ? NaN : i * 1.1, +); +const s = new Series(seriesData); + +const colA = Float64Array.from({ length: ROWS }, (_, i) => (i % 7 === 0 ? NaN : i * 0.5)); +const colB = Float64Array.from({ length: ROWS }, (_, i) => (i % 11 === 0 ? NaN : i * 1.5)); +const colC = Float64Array.from({ length: ROWS }, (_, i) => (i % 13 === 0 ? NaN : i * 2.0)); +const df = DataFrame.fromColumns({ a: colA, b: colB, c: colC }); + +for (let i = 0; i < WARMUP; i++) { + dropna(s); + dropnaDataFrame(df, { how: "any" }); + dropnaDataFrame(df, { how: "all" }); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + dropna(s); + dropnaDataFrame(df, { how: "any" }); + dropnaDataFrame(df, { how: "all" }); +} +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "dropna", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_fillna.ts b/benchmarks/tsb/bench_fillna.ts new file mode 100644 index 00000000..f56193a7 --- /dev/null +++ b/benchmarks/tsb/bench_fillna.ts @@ -0,0 +1,42 @@ +/** + * Benchmark: fillna on Series and DataFrame (scalar, ffill, bfill) + */ +import { Series, DataFrame, fillnaSeries, fillnaDataFrame } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const seriesData = Float64Array.from({ length: ROWS }, (_, i) => + i % 10 === 0 ? NaN : i * 1.1, +); +const s = new Series(seriesData); + +const colA = Float64Array.from({ length: ROWS }, (_, i) => (i % 7 === 0 ? NaN : i * 0.5)); +const colB = Float64Array.from({ length: ROWS }, (_, i) => (i % 11 === 0 ? NaN : i * 1.5)); +const df = DataFrame.fromColumns({ a: colA, b: colB }); + +for (let i = 0; i < WARMUP; i++) { + fillnaSeries(s, { value: 0 }); + fillnaSeries(s, { method: "ffill" }); + fillnaDataFrame(df, { value: 0 }); + fillnaDataFrame(df, { method: "bfill" }); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + fillnaSeries(s, { value: 0 }); + fillnaSeries(s, { method: "ffill" }); + fillnaDataFrame(df, { value: 0 }); + fillnaDataFrame(df, { method: "bfill" }); +} +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "fillna", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +);