File np-frombuffer.patch of Package python-numba

From 1b6b5118a2a842fe68b22898d78dcab4a2883830 Mon Sep 17 00:00:00 2001
From: Hanif Ariffin <hanif.ariffin.4326@gmail.com>
Date: Sun, 16 Feb 2025 19:30:47 +0800
Subject: [PATCH] Implemented np.frombuffer kwargs offset and count

---
 docs/upcoming_changes/9926.improvement.rst |  4 ++
 numba/np/arrayobj.py                       | 65 ++++++++++++++++++----
 numba/tests/test_array_methods.py          | 61 ++++++++++++++++++--
 3 files changed, 114 insertions(+), 16 deletions(-)
 create mode 100644 docs/upcoming_changes/9926.improvement.rst

diff --git a/docs/upcoming_changes/9926.improvement.rst b/docs/upcoming_changes/9926.improvement.rst
new file mode 100644
index 00000000000..0fbf36f8164
--- /dev/null
+++ b/docs/upcoming_changes/9926.improvement.rst
@@ -0,0 +1,4 @@
+Implemented support for ``np.frombuffer`` kwargs ``offset`` and ``count``
+-------------------------------------------------------------------------
+
+``np.frombuffer`` now support ``offset`` and ``count`` kwargs.
\ No newline at end of file
diff --git a/numba/np/arrayobj.py b/numba/np/arrayobj.py
index 740355599e6..66a1e352451 100644
--- a/numba/np/arrayobj.py
+++ b/numba/np/arrayobj.py
@@ -5265,12 +5265,14 @@ def impl(arr):
 
 
 @intrinsic
-def np_frombuffer(typingctx, buffer, dtype, retty):
+def np_frombuffer(typingctx, buffer, dtype, count, offset, retty):
     ty = retty.instance_type
-    sig = ty(buffer, dtype, retty)
+    sig = ty(buffer, dtype, count, offset, retty)
 
     def codegen(context, builder, sig, args):
         bufty = sig.args[0]
+        arg_count = args[2]
+        arg_offset = args[3]
         aryty = sig.return_type
 
         buf = make_array(bufty)(context, builder, value=args[0])
@@ -5281,6 +5283,24 @@ def codegen(context, builder, sig, args):
         itemsize = get_itemsize(context, aryty)
         ll_itemsize = Constant(buf.itemsize.type, itemsize)
         nbytes = builder.mul(buf.nitems, buf.itemsize)
+        ll_offset_size = builder.mul(arg_offset, ll_itemsize)
+        nbytes = builder.sub(nbytes, ll_offset_size)
+
+        nbytes_is_negative = builder.icmp_signed(
+            '<',
+            nbytes,
+            ir.Constant(arg_count.type, 0),
+        )
+
+        with builder.if_then(nbytes_is_negative, likely=False):
+            msg = "offset must be non-negative and no greater than buffer length"
+            context.call_conv.return_user_exc(builder, ValueError, (msg,))
+
+        ll_count_is_negative = builder.icmp_signed(
+            '<',
+            arg_count,
+            ir.Constant(arg_count.type, 0),
+        )
 
         # Check that the buffer size is compatible
         rem = builder.srem(nbytes, ll_itemsize)
@@ -5289,10 +5309,33 @@ def codegen(context, builder, sig, args):
             msg = "buffer size must be a multiple of element size"
             context.call_conv.return_user_exc(builder, ValueError, (msg,))
 
-        shape = cgutils.pack_array(builder, [builder.sdiv(nbytes, ll_itemsize)])
+        # Compute number of elements based on count
+        with builder.if_else(ll_count_is_negative) as (then_block, else_block):
+            with then_block:
+                bb_if = builder.basic_block
+                num_whole = builder.sdiv(nbytes, ll_itemsize)
+            with else_block:
+                bb_else = builder.basic_block
+
+        ll_itemcount = builder.phi(arg_count.type)
+        ll_itemcount.add_incoming(num_whole, bb_if)
+        ll_itemcount.add_incoming(arg_count, bb_else)
+
+        # Ensure we don’t exceed the buffer size
+        ll_required_size = builder.mul(ll_itemcount, ll_itemsize)
+        is_too_large = builder.icmp_unsigned('>', ll_required_size, nbytes)
+
+        with builder.if_then(is_too_large, likely=False):
+            msg = "buffer is smaller than requested size"
+            context.call_conv.return_user_exc(builder, ValueError, (msg,))
+
+        # Set shape and strides
+        shape = cgutils.pack_array(builder, [ll_itemcount])
         strides = cgutils.pack_array(builder, [ll_itemsize])
+
+        data = builder.gep(buf.data, [arg_offset])
         data = builder.bitcast(
-            buf.data, context.get_value_type(out_datamodel.get_type('data'))
+            data, context.get_value_type(out_datamodel.get_type('data'))
         )
 
         populate_array(out_ary,
@@ -5301,15 +5344,16 @@ def codegen(context, builder, sig, args):
                        strides=strides,
                        itemsize=ll_itemsize,
                        meminfo=buf.meminfo,
-                       parent=buf.parent,)
+                       parent=buf.parent)
 
         res = out_ary._getvalue()
         return impl_ret_borrowed(context, builder, sig.return_type, res)
+
     return sig, codegen
 
 
 @overload(np.frombuffer)
-def impl_np_frombuffer(buffer, dtype=float):
+def impl_np_frombuffer(buffer, dtype=float, count=-1, offset=0):
     _check_const_str_dtype("frombuffer", dtype)
 
     if not isinstance(buffer, types.Buffer) or buffer.layout != 'C':
@@ -5317,8 +5361,8 @@ def impl_np_frombuffer(buffer, dtype=float):
         raise errors.TypingError(msg)
 
     if (dtype is float or
-        (isinstance(dtype, types.Function) and dtype.typing_key is float) or
-            is_nonelike(dtype)): #default
+            (isinstance(dtype, types.Function) and dtype.typing_key is float) or
+            is_nonelike(dtype)):  # default
         nb_dtype = types.double
     else:
         nb_dtype = ty_parse_dtype(dtype)
@@ -5331,8 +5375,9 @@ def impl_np_frombuffer(buffer, dtype=float):
                f"np.frombuffer({buffer}, {dtype})")
         raise errors.TypingError(msg)
 
-    def impl(buffer, dtype=float):
-        return np_frombuffer(buffer, dtype, retty)
+    def impl(buffer, dtype=float, count=-1, offset=0):
+        return np_frombuffer(buffer, dtype, count, offset, retty)
+
     return impl
 
 
diff --git a/numba/tests/test_array_methods.py b/numba/tests/test_array_methods.py
index fa2608b2163..49f37a20b7f 100644
--- a/numba/tests/test_array_methods.py
+++ b/numba/tests/test_array_methods.py
@@ -146,17 +146,17 @@ def array_astype(arr):
     return array_astype
 
 
-def np_frombuffer(b):
+def np_frombuffer(b, dtype: np.dtype = np.float64, count=-1, offset=0):
     """
     np.frombuffer() on a Python-allocated buffer.
     """
-    return np.frombuffer(b)
+    return np.frombuffer(b, dtype=dtype, count=count, offset=offset)
 
-def np_frombuffer_dtype(b):
-    return np.frombuffer(b, dtype=np.complex64)
+def np_frombuffer_dtype(b, count=-1, offset=0):
+    return np.frombuffer(b, dtype=np.complex64, count=count, offset=offset)
 
-def np_frombuffer_dtype_str(b):
-    return np.frombuffer(b, dtype='complex64')
+def np_frombuffer_dtype_str(b, count=-1, offset=0):
+    return np.frombuffer(b, dtype='complex64', count=count, offset=offset)
 
 def np_frombuffer_allocated(shape):
     """
@@ -1806,6 +1806,55 @@ def test_array_ctor_with_dtype_arg(self):
         args = n, np.dtype('f4')
         np.testing.assert_array_equal(pyfunc(*args), cfunc(*args))
 
+    def test_frombuffer_offset(self):
+        # Expect to skip the first two elements (offset = 2 bytes)
+        buffer = np.arange(8, dtype=np.uint8)
+        offset = 2
+        result = np_frombuffer(buffer, dtype=buffer.dtype, offset=offset)
+        expected = np.array([2, 3, 4, 5, 6, 7], dtype=buffer.dtype)
+        np.testing.assert_array_equal(result, expected)
+
+    def test_frombuffer_count(self):
+        # Expect to read only 4 elements
+        buffer = np.arange(24, dtype=np.uint8)
+        count = 4
+        result = np_frombuffer(buffer, dtype=buffer.dtype, count=count)
+        expected = np.array([0, 1, 2, 3], dtype=buffer.dtype)
+        np.testing.assert_array_equal(result, expected)
+
+    def test_frombuffer_count_negative_means_all(self):
+        # Expect to read only 4 elements
+        buffer = np.arange(8, dtype=np.uint8)
+        result = np_frombuffer(buffer, dtype=buffer.dtype, count=-1)
+        expected = np.array([0, 1, 2, 3, 4, 5, 6, 7], dtype=buffer.dtype)
+        np.testing.assert_array_equal(result, expected)
+
+    def test_frombuffer_offset_and_count(self):
+        # Skip first 2 bytes and read 3 elements
+        buffer = np.arange(24, dtype=np.uint8)
+        offset = 2
+        count = 3
+        result = np_frombuffer(buffer, dtype=buffer.dtype, offset=offset, count=count)
+        expected = np.array([2, 3, 4], dtype=buffer.dtype)
+        np.testing.assert_array_equal(result, expected)
+
+    def test_frombuffer_invalid_offset(self):
+        # Test behavior when offset exceeds buffer size
+        buffer = np.arange(24, dtype=np.uint8)
+        offset = len(buffer) + 1  # Invalid offset
+        msg = "offset must be non-negative and no greater than buffer length"
+        with self.assertRaisesRegex(ValueError, msg):
+            np_frombuffer(buffer, dtype=buffer.dtype, offset=offset)
+
+    def test_frombuffer_invalid_count(self):
+        # Test behavior when count exceeds the possible number of elements
+        buffer = np.arange(24, dtype=np.uint8)
+        count = len(buffer) + 1  # Count exceeds buffer size
+        msg = "buffer is smaller than requested size"
+        with self.assertRaisesRegex(ValueError, msg):
+            np.frombuffer(buffer, dtype=buffer.dtype, count=count)
+
+
 class TestArrayComparisons(TestCase):
 
     def test_identity(self):
Places

File np-frombuffer.patch of Package python-numba

Places