Skip to content

Commit

Permalink
[backport] Fix using categorical data with the ranker. (#9753) (#9778)
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis authored Nov 8, 2023
1 parent a408254 commit 0ffc52e
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 3 deletions.
12 changes: 11 additions & 1 deletion python-package/xgboost/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -2093,7 +2093,17 @@ def score(self, X: ArrayLike, y: ArrayLike) -> float:
"""
X, qid = _get_qid(X, None)
Xyq = DMatrix(X, y, qid=qid)
# fixme(jiamingy): base margin and group weight is not yet supported. We might
# need to make extra special fields in the dataframe.
Xyq = DMatrix(
X,
y,
qid=qid,
missing=self.missing,
enable_categorical=self.enable_categorical,
nthread=self.n_jobs,
feature_types=self.feature_types,
)
if callable(self.eval_metric):
metric = ltr_metric_decorator(self.eval_metric, self.n_jobs)
result_str = self.get_booster().eval_set([(Xyq, "eval")], feval=metric)
Expand Down
25 changes: 25 additions & 0 deletions python-package/xgboost/testing/ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,28 @@ def neg_mse(*args: Any, **kwargs: Any) -> float:

with pytest.raises(ValueError, match="Either `group` or `qid`."):
ranker.fit(df, y, eval_set=[(X, y)])


def run_ranking_categorical(device: str) -> None:
"""Test LTR with categorical features."""
from sklearn.model_selection import cross_val_score

X, y = tm.make_categorical(
n_samples=512, n_features=10, n_categories=3, onehot=False
)
rng = np.random.default_rng(1994)
qid = rng.choice(3, size=y.shape[0])
qid = np.sort(qid)
X["qid"] = qid

ltr = xgb.XGBRanker(enable_categorical=True, device=device)
ltr.fit(X, y)
score = ltr.score(X, y)
assert score > 0.9

ltr = xgb.XGBRanker(enable_categorical=True, device=device)

# test using the score function inside sklearn.
scores = cross_val_score(ltr, X, y)
for s in scores:
assert s > 0.7
7 changes: 6 additions & 1 deletion tests/python-gpu/test_gpu_with_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.ranking import run_ranking_qid_df
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df

sys.path.append("tests/python")
import test_with_sklearn as twskl # noqa
Expand Down Expand Up @@ -165,6 +165,11 @@ def test_ranking_qid_df():
run_ranking_qid_df(cudf, "gpu_hist")


@pytest.mark.skipif(**tm.no_pandas())
def test_ranking_categorical() -> None:
run_ranking_categorical(device="cuda")


@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_device_ordinal() -> None:
Expand Down
7 changes: 6 additions & 1 deletion tests/python/test_with_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.ranking import run_ranking_qid_df
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
from xgboost.testing.updater import get_basescore

Expand Down Expand Up @@ -173,6 +173,11 @@ def test_ranking():
np.testing.assert_almost_equal(pred, pred_orig)


@pytest.mark.skipif(**tm.no_pandas())
def test_ranking_categorical() -> None:
run_ranking_categorical(device="cpu")


def test_ranking_metric() -> None:
from sklearn.metrics import roc_auc_score

Expand Down

0 comments on commit 0ffc52e

Please sign in to comment.