File dropna.patch of Package python-pandas

From 1e899afbd9ca20f4ce9d6f93e1f62c072be0ed23 Mon Sep 17 00:00:00 2001
From: Gen Sato <52241300+halogen22@users.noreply.github.com>
Date: Tue, 18 Mar 2025 01:33:40 +0900
Subject: [PATCH] BUG: .mode(dropna=False) doesn't work with nullable integers
 (#61132)

* Fix dropna bug when mode

* Fix test cases

* Fix data type incompatible
---
 doc/source/whatsnew/v3.0.0.rst            |  1 +
 pandas/_libs/hashtable_func_helper.pxi.in |  2 +-
 pandas/core/algorithms.py                 | 12 +++---
 pandas/core/arrays/base.py                |  5 ++-
 pandas/core/arrays/categorical.py         |  2 +-
 pandas/core/arrays/datetimelike.py        |  2 +-
 pandas/core/arrays/masked.py              |  8 +---
 pandas/core/series.py                     |  2 +-
 pandas/tests/series/test_reductions.py    | 23 +++++++++++
 pandas/tests/test_algos.py                | 47 +++++++++++++++--------
 10 files changed, 71 insertions(+), 33 deletions(-)

Index: pandas-2.2.3/pandas/_libs/hashtable_func_helper.pxi.in
===================================================================
--- pandas-2.2.3.orig/pandas/_libs/hashtable_func_helper.pxi.in
+++ pandas-2.2.3/pandas/_libs/hashtable_func_helper.pxi.in
@@ -443,7 +443,7 @@ def mode(ndarray[htfunc_t] values, bint
 
     if na_counter > 0:
         res_mask = np.zeros(j+1, dtype=np.bool_)
-        res_mask[j] = True
+        res_mask[j] = (na_counter == max_count)
     return modes[:j + 1], res_mask
 
 
Index: pandas-2.2.3/pandas/core/algorithms.py
===================================================================
--- pandas-2.2.3.orig/pandas/core/algorithms.py
+++ pandas-2.2.3/pandas/core/algorithms.py
@@ -1022,7 +1022,7 @@ def duplicated(
 
 def mode(
     values: ArrayLike, dropna: bool = True, mask: npt.NDArray[np.bool_] | None = None
-) -> ArrayLike:
+) -> tuple[np.ndarray, npt.NDArray[np.bool_]] | ExtensionArray:
     """
     Returns the mode(s) of an array.
 
@@ -1035,7 +1035,7 @@ def mode(
 
     Returns
     -------
-    np.ndarray or ExtensionArray
+    Union[Tuple[np.ndarray, npt.NDArray[np.bool_]], ExtensionArray]
     """
     values = _ensure_arraylike(values, func_name="mode")
     original = values
@@ -1049,8 +1049,10 @@ def mode(
     values = _ensure_data(values)
 
     npresult, res_mask = htable.mode(values, dropna=dropna, mask=mask)
-    if res_mask is not None:
-        return npresult, res_mask  # type: ignore[return-value]
+    if res_mask is None:
+        res_mask = np.zeros(npresult.shape, dtype=np.bool_)
+    else:
+        return npresult, res_mask
 
     try:
         npresult = np.sort(npresult)
@@ -1061,7 +1063,7 @@ def mode(
         )
 
     result = _reconstruct_data(npresult, original.dtype, original)
-    return result
+    return result, res_mask
 
 
 def rank(
Index: pandas-2.2.3/pandas/core/arrays/base.py
===================================================================
--- pandas-2.2.3.orig/pandas/core/arrays/base.py
+++ pandas-2.2.3/pandas/core/arrays/base.py
@@ -2270,8 +2270,9 @@ class ExtensionArray:
             Sorted, if possible.
         """
         # error: Incompatible return value type (got "Union[ExtensionArray,
-        # ndarray[Any, Any]]", expected "Self")
-        return mode(self, dropna=dropna)  # type: ignore[return-value]
+        # Tuple[np.ndarray, npt.NDArray[np.bool_]]", expected "Self")
+        result, _ = mode(self, dropna=dropna)
+        return result  # type: ignore[return-value]
 
     def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
         if any(
Index: pandas-2.2.3/pandas/core/arrays/categorical.py
===================================================================
--- pandas-2.2.3.orig/pandas/core/arrays/categorical.py
+++ pandas-2.2.3/pandas/core/arrays/categorical.py
@@ -2435,7 +2435,7 @@ class Categorical(NDArrayBackedExtension
         if dropna:
             mask = self.isna()
 
-        res_codes = algorithms.mode(codes, mask=mask)
+        res_codes, _ = algorithms.mode(codes, mask=mask)
         res_codes = cast(np.ndarray, res_codes)
         assert res_codes.dtype == codes.dtype
         res = self._from_backing_data(res_codes)
Index: pandas-2.2.3/pandas/core/arrays/datetimelike.py
===================================================================
--- pandas-2.2.3.orig/pandas/core/arrays/datetimelike.py
+++ pandas-2.2.3/pandas/core/arrays/datetimelike.py
@@ -1646,7 +1646,7 @@ class DatetimeLikeArrayMixin(  # type: i
         if dropna:
             mask = self.isna()
 
-        i8modes = algorithms.mode(self.view("i8"), mask=mask)
+        i8modes, _ = algorithms.mode(self.view("i8"), mask=mask)
         npmodes = i8modes.view(self._ndarray.dtype)
         npmodes = cast(np.ndarray, npmodes)
         return self._from_backing_data(npmodes)
Index: pandas-2.2.3/pandas/core/arrays/masked.py
===================================================================
--- pandas-2.2.3.orig/pandas/core/arrays/masked.py
+++ pandas-2.2.3/pandas/core/arrays/masked.py
@@ -1105,12 +1105,8 @@ class BaseMaskedArray(OpsMixin, Extensio
         return Series(arr, index=index, name="count", copy=False)
 
     def _mode(self, dropna: bool = True) -> Self:
-        if dropna:
-            result = mode(self._data, dropna=dropna, mask=self._mask)
-            res_mask = np.zeros(result.shape, dtype=np.bool_)
-        else:
-            result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
-        result = type(self)(result, res_mask)  # type: ignore[arg-type]
+        result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
+        result = type(self)(result, res_mask)
         return result[result.argsort()]
 
     @doc(ExtensionArray.equals)
Index: pandas-2.2.3/pandas/core/series.py
===================================================================
--- pandas-2.2.3.orig/pandas/core/series.py
+++ pandas-2.2.3/pandas/core/series.py
@@ -2328,7 +2328,7 @@ class Series(base.IndexOpsMixin, NDFrame
         # TODO: Add option for bins like value_counts()
         values = self._values
         if isinstance(values, np.ndarray):
-            res_values = algorithms.mode(values, dropna=dropna)
+            res_values, _ = algorithms.mode(values, dropna=dropna)
         else:
             res_values = values._mode(dropna=dropna)
 
Index: pandas-2.2.3/pandas/tests/series/test_reductions.py
===================================================================
--- pandas-2.2.3.orig/pandas/tests/series/test_reductions.py
+++ pandas-2.2.3/pandas/tests/series/test_reductions.py
@@ -51,6 +51,29 @@ def test_mode_nullable_dtype(any_numeric
     tm.assert_series_equal(result, expected)
 
 
+def test_mode_nullable_dtype_edge_case(any_numeric_ea_dtype):
+    # GH##58926
+    ser = Series([1, 2, 3, 1], dtype=any_numeric_ea_dtype)
+    result = ser.mode(dropna=False)
+    expected = Series([1], dtype=any_numeric_ea_dtype)
+    tm.assert_series_equal(result, expected)
+
+    ser2 = Series([1, 1, 2, 3, pd.NA], dtype=any_numeric_ea_dtype)
+    result = ser2.mode(dropna=False)
+    expected = Series([1], dtype=any_numeric_ea_dtype)
+    tm.assert_series_equal(result, expected)
+
+    ser3 = Series([1, pd.NA, pd.NA], dtype=any_numeric_ea_dtype)
+    result = ser3.mode(dropna=False)
+    expected = Series([pd.NA], dtype=any_numeric_ea_dtype)
+    tm.assert_series_equal(result, expected)
+
+    ser4 = Series([1, 1, pd.NA, pd.NA], dtype=any_numeric_ea_dtype)
+    result = ser4.mode(dropna=False)
+    expected = Series([1, pd.NA], dtype=any_numeric_ea_dtype)
+    tm.assert_series_equal(result, expected)
+
+
 def test_mode_infer_string():
     # GH#56183
     pytest.importorskip("pyarrow")
Index: pandas-2.2.3/pandas/tests/test_algos.py
===================================================================
--- pandas-2.2.3.orig/pandas/tests/test_algos.py
+++ pandas-2.2.3/pandas/tests/test_algos.py
@@ -1840,7 +1840,8 @@ class TestRank:
 class TestMode:
     def test_no_mode(self):
         exp = Series([], dtype=np.float64, index=Index([], dtype=int))
-        tm.assert_numpy_array_equal(algos.mode(np.array([])), exp.values)
+        result, _ = algos.mode(np.array([]))
+        tm.assert_numpy_array_equal(result, exp.values)
 
     @pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
     def test_mode_single(self, dt):
@@ -1853,20 +1854,24 @@ class TestMode:
 
         ser = Series(data_single, dtype=dt)
         exp = Series(exp_single, dtype=dt)
-        tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+        result, _ = algos.mode(ser.values)
+        tm.assert_numpy_array_equal(result, exp.values)
         tm.assert_series_equal(ser.mode(), exp)
 
         ser = Series(data_multi, dtype=dt)
         exp = Series(exp_multi, dtype=dt)
-        tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+        result, _ = algos.mode(ser.values)
+        tm.assert_numpy_array_equal(result, exp.values)
         tm.assert_series_equal(ser.mode(), exp)
 
     def test_mode_obj_int(self):
         exp = Series([1], dtype=int)
-        tm.assert_numpy_array_equal(algos.mode(exp.values), exp.values)
+        result, _ = algos.mode(exp.values)
+        tm.assert_numpy_array_equal(result, exp.values)
 
         exp = Series(["a", "b", "c"], dtype=object)
-        tm.assert_numpy_array_equal(algos.mode(exp.values), exp.values)
+        result, _ = algos.mode(exp.values)
+        tm.assert_numpy_array_equal(result, exp.values)
 
     @pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
     def test_number_mode(self, dt):
@@ -1878,12 +1883,14 @@ class TestMode:
 
         ser = Series(data_single, dtype=dt)
         exp = Series(exp_single, dtype=dt)
-        tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+        result, _ = algos.mode(ser.values)
+        tm.assert_numpy_array_equal(result, exp.values)
         tm.assert_series_equal(ser.mode(), exp)
 
         ser = Series(data_multi, dtype=dt)
         exp = Series(exp_multi, dtype=dt)
-        tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+        result, _ = algos.mode(ser.values)
+        tm.assert_numpy_array_equal(result, exp.values)
         tm.assert_series_equal(ser.mode(), exp)
 
     def test_strobj_mode(self):
@@ -1892,7 +1899,8 @@ class TestMode:
 
         ser = Series(data, dtype="c")
         exp = Series(exp, dtype="c")
-        tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+        result, _ = algos.mode(ser.values)
+        tm.assert_numpy_array_equal(result, exp.values)
         tm.assert_series_equal(ser.mode(), exp)
 
     @pytest.mark.parametrize("dt", [str, object])
@@ -1902,7 +1910,8 @@ class TestMode:
 
         ser = Series(data, dtype=dt)
         exp = Series(exp, dtype=dt)
-        tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+        result, _ = algos.mode(ser.values)
+        tm.assert_numpy_array_equal(result, exp.values)
         tm.assert_series_equal(ser.mode(), exp)
 
     def test_datelike_mode(self):
@@ -1936,18 +1945,21 @@ class TestMode:
     def test_mixed_dtype(self):
         exp = Series(["foo"], dtype=object)
         ser = Series([1, "foo", "foo"])
-        tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+        result, _ = algos.mode(ser.values)
+        tm.assert_numpy_array_equal(result, exp.values)
         tm.assert_series_equal(ser.mode(), exp)
 
     def test_uint64_overflow(self):
         exp = Series([2**63], dtype=np.uint64)
         ser = Series([1, 2**63, 2**63], dtype=np.uint64)
-        tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+        result, _ = algos.mode(ser.values)
+        tm.assert_numpy_array_equal(result, exp.values)
         tm.assert_series_equal(ser.mode(), exp)
 
         exp = Series([1, 2**63], dtype=np.uint64)
         ser = Series([1, 2**63], dtype=np.uint64)
-        tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+        result, _ = algos.mode(ser.values)
+        tm.assert_numpy_array_equal(result, exp.values)
         tm.assert_series_equal(ser.mode(), exp)
 
     def test_categorical(self):
@@ -1969,15 +1981,18 @@ class TestMode:
     def test_index(self):
         idx = Index([1, 2, 3])
         exp = Series([1, 2, 3], dtype=np.int64)
-        tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
+        result, _ = algos.mode(idx)
+        tm.assert_numpy_array_equal(result, exp.values)
 
         idx = Index([1, "a", "a"])
         exp = Series(["a"], dtype=object)
-        tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
+        result, _ = algos.mode(idx)
+        tm.assert_numpy_array_equal(result, exp.values)
 
         idx = Index([1, 1, 2, 3, 3])
         exp = Series([1, 3], dtype=np.int64)
-        tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
+        result, _ = algos.mode(idx)
+        tm.assert_numpy_array_equal(result, exp.values)
 
         idx = Index(
             ["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],
openSUSE Build Service is sponsored by