From e9e9d6540c4e9feaf75db3f326e5ebf25356ee20 Mon Sep 17 00:00:00 2001
From: Tom White <tom.e.white@gmail.com>
Date: Thu, 19 Sep 2024 09:49:13 +0100
Subject: [PATCH 1/6] Install dask[dataframe] explicitly to fix upstream error

---
 .github/scripts/upstream_install.py | 2 +-
 requirements.txt                    | 2 +-
 setup.cfg                           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/upstream_install.py b/.github/scripts/upstream_install.py
index 334795790..87f989d7d 100644
--- a/.github/scripts/upstream_install.py
+++ b/.github/scripts/upstream_install.py
@@ -14,7 +14,7 @@ def install_deps() -> None:
         "--upgrade",
     )
     upstream_deps = (
-        "git+https://github.com/dask/dask.git#egg=dask[array]",
+        "git+https://github.com/dask/dask.git#egg=dask[array,dataframe]",
         "git+https://github.com/dask/distributed.git#egg=distributed",
         "git+https://github.com/dask/dask-ml.git#egg=dask-ml",
         "git+https://github.com/pandas-dev/pandas#egg=pandas",
diff --git a/requirements.txt b/requirements.txt
index dcc24d89b..db170a79f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 numpy < 2
 xarray
-dask[array] >= 2023.01.0, <= 2024.8.0
+dask[array,dataframe] >= 2023.01.0, <= 2024.8.0
 distributed >= 2023.01.0, <= 2024.8.0
 dask-ml
 scipy
diff --git a/setup.cfg b/setup.cfg
index 8bacbf5f7..42aa1afac 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -30,7 +30,7 @@ python_requires = >=3.9
 install_requires =
     numpy < 2
     xarray
-    dask[array] >= 2022.01.0, <= 2024.8.0
+    dask[array,dataframe] >= 2022.01.0, <= 2024.8.0
     distributed >= 2022.01.0, <= 2024.8.0
     dask-ml
     scipy

From 34ac4ee74087b37b44a0a8b4705fb20ecfbc366c Mon Sep 17 00:00:00 2001
From: Tom White <tom.e.white@gmail.com>
Date: Thu, 19 Sep 2024 09:51:54 +0100
Subject: [PATCH 2/6] Temp change to run on PR

---
 .github/workflows/upstream.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml
index c4a663137..48fd60173 100644
--- a/.github/workflows/upstream.yml
+++ b/.github/workflows/upstream.yml
@@ -1,6 +1,7 @@
 name: Upstream
 
 on:
+  pull_request:
   push:
   schedule:
     - cron: "0 1 * * *"

From bc9d699c7b200ada1ba21087105acbdb34ff2f02 Mon Sep 17 00:00:00 2001
From: Tom White <tom.e.white@gmail.com>
Date: Mon, 28 Oct 2024 09:31:39 +0000
Subject: [PATCH 3/6] Change minimum Python version of upstream CI to 3.11 to
 match Zarr

---
 .github/workflows/upstream.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml
index 48fd60173..ef3ccfa2d 100644
--- a/.github/workflows/upstream.yml
+++ b/.github/workflows/upstream.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.10", "3.11"]
+        python-version: ["3.11", "3.12"]
 
     steps:
     - uses: actions/checkout@v2

From 54986078bc779ceacd72678fa0c8f23db1a3b851 Mon Sep 17 00:00:00 2001
From: Tom White <tom.e.white@gmail.com>
Date: Mon, 28 Oct 2024 09:32:56 +0000
Subject: [PATCH 4/6] Unpin Dask since slowdown issue was addressed in
 2024.10.0

---
 requirements-numpy2.txt | 4 ++--
 requirements.txt        | 4 ++--
 setup.cfg               | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/requirements-numpy2.txt b/requirements-numpy2.txt
index 491e63fb8..16d16f990 100644
--- a/requirements-numpy2.txt
+++ b/requirements-numpy2.txt
@@ -1,7 +1,7 @@
 numpy < 2.1
 xarray
-dask[array] >= 2023.01.0, <= 2024.8.0
-distributed >= 2023.01.0, <= 2024.8.0
+dask[array] >= 2023.01.0, != 2024.8.1, != 2024.9.*
+distributed >= 2023.01.0, != 2024.8.1, != 2024.9.*
 dask-ml
 scipy
 typing-extensions
diff --git a/requirements.txt b/requirements.txt
index db170a79f..eb117e179 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 numpy < 2
 xarray
-dask[array,dataframe] >= 2023.01.0, <= 2024.8.0
-distributed >= 2023.01.0, <= 2024.8.0
+dask[array,dataframe] >= 2023.01.0, != 2024.8.1, != 2024.9.*
+distributed >= 2023.01.0, != 2024.8.1, != 2024.9.*
 dask-ml
 scipy
 typing-extensions
diff --git a/setup.cfg b/setup.cfg
index 42aa1afac..fa506621d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -30,8 +30,8 @@ python_requires = >=3.9
 install_requires =
     numpy < 2
     xarray
-    dask[array,dataframe] >= 2022.01.0, <= 2024.8.0
-    distributed >= 2022.01.0, <= 2024.8.0
+    dask[array,dataframe] >= 2022.01.0, != 2024.8.1, != 2024.9.*
+    distributed >= 2022.01.0, != 2024.8.1, != 2024.9.*
     dask-ml
     scipy
     zarr >= 2.10.0, != 2.11.0, != 2.11.1, != 2.11.2, < 3

From 5baef495b44ab0417dfe786e125bd6079e5a7323 Mon Sep 17 00:00:00 2001
From: Tom White <tom.e.white@gmail.com>
Date: Mon, 28 Oct 2024 09:41:37 +0000
Subject: [PATCH 5/6] Don't run upstream CI on Python 3.12 due to cbgen
 incompatibility

---
 .github/workflows/upstream.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml
index ef3ccfa2d..b3e2a5b4d 100644
--- a/.github/workflows/upstream.yml
+++ b/.github/workflows/upstream.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.11", "3.12"]
+        python-version: ["3.11"]
 
     steps:
     - uses: actions/checkout@v2

From e83b52cdf1ef1b305eefdd8bcaca55b437cc4e4b Mon Sep 17 00:00:00 2001
From: Tom White <tom.e.white@gmail.com>
Date: Mon, 28 Oct 2024 11:16:53 +0000
Subject: [PATCH 6/6] Fix problem with lambdas wrapping numba functions get
 recompiled

---
 sgkit/stats/aggregation.py           | 8 ++++----
 sgkit/stats/aggregation_numba_fns.py | 6 ++++++
 sgkit/stats/popgen.py                | 4 +---
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/sgkit/stats/aggregation.py b/sgkit/stats/aggregation.py
index 9360e318c..9f3862985 100644
--- a/sgkit/stats/aggregation.py
+++ b/sgkit/stats/aggregation.py
@@ -680,7 +680,7 @@ def variant_stats(
     --------
     :func:`count_variant_genotypes`
     """
-    from .aggregation_numba_fns import count_hom
+    from .aggregation_numba_fns import count_hom_new_axis
 
     variables.validate(ds, {call_genotype: variables.call_genotype_spec})
     mixed_ploidy = ds[call_genotype].attrs.get("mixed_ploidy", False)
@@ -697,7 +697,7 @@ def variant_stats(
     G = da.asarray(ds[call_genotype].data)
     H = xr.DataArray(
         da.map_blocks(
-            lambda *args: count_hom(*args)[:, np.newaxis, :],
+            count_hom_new_axis,
             G,
             np.zeros(3, np.uint64),
             drop_axis=2,
@@ -796,7 +796,7 @@ def sample_stats(
     ValueError
         If the dataset contains mixed-ploidy genotype calls.
     """
-    from .aggregation_numba_fns import count_hom
+    from .aggregation_numba_fns import count_hom_new_axis
 
     variables.validate(ds, {call_genotype: variables.call_genotype_spec})
     mixed_ploidy = ds[call_genotype].attrs.get("mixed_ploidy", False)
@@ -805,7 +805,7 @@ def sample_stats(
     GT = da.asarray(ds[call_genotype].transpose("samples", "variants", "ploidy").data)
     H = xr.DataArray(
         da.map_blocks(
-            lambda *args: count_hom(*args)[:, np.newaxis, :],
+            count_hom_new_axis,
             GT,
             np.zeros(3, np.uint64),
             drop_axis=2,
diff --git a/sgkit/stats/aggregation_numba_fns.py b/sgkit/stats/aggregation_numba_fns.py
index 3335f5457..b84b92a09 100644
--- a/sgkit/stats/aggregation_numba_fns.py
+++ b/sgkit/stats/aggregation_numba_fns.py
@@ -2,6 +2,8 @@
 # in a separate file here, and imported dynamically to avoid
 # initial compilation overhead.
 
+import numpy as np
+
 from sgkit.accelerate import numba_guvectorize, numba_jit
 from sgkit.typing import ArrayLike
 
@@ -102,3 +104,7 @@ def count_hom(
         index = _classify_hom(genotypes[i])
         if index >= 0:
             out[index] += 1
+
+
+def count_hom_new_axis(genotypes: ArrayLike, _: ArrayLike) -> ArrayLike:
+    return count_hom(genotypes, _)[:, np.newaxis, :]
diff --git a/sgkit/stats/popgen.py b/sgkit/stats/popgen.py
index d000bdbee..e201dfc98 100644
--- a/sgkit/stats/popgen.py
+++ b/sgkit/stats/popgen.py
@@ -595,9 +595,7 @@ def pbs(
     cohorts = cohorts or list(itertools.combinations(range(n_cohorts), 3))  # type: ignore
     ct = _cohorts_to_array(cohorts, ds.indexes.get("cohorts_0", None))
 
-    p = da.map_blocks(
-        lambda t: _pbs_cohorts(t, ct), t, chunks=shape, new_axis=3, dtype=np.float64
-    )
+    p = da.map_blocks(_pbs_cohorts, t, ct, chunks=shape, new_axis=3, dtype=np.float64)
     assert_array_shape(p, n_windows, n_cohorts, n_cohorts, n_cohorts)
 
     new_ds = create_dataset(