File np-frombuffer.patch of Package python-numba
From 1b6b5118a2a842fe68b22898d78dcab4a2883830 Mon Sep 17 00:00:00 2001
From: Hanif Ariffin <hanif.ariffin.4326@gmail.com>
Date: Sun, 16 Feb 2025 19:30:47 +0800
Subject: [PATCH] Implemented np.frombuffer kwargs offset and count
---
docs/upcoming_changes/9926.improvement.rst | 4 ++
numba/np/arrayobj.py | 65 ++++++++++++++++++----
numba/tests/test_array_methods.py | 61 ++++++++++++++++++--
3 files changed, 114 insertions(+), 16 deletions(-)
create mode 100644 docs/upcoming_changes/9926.improvement.rst
diff --git a/docs/upcoming_changes/9926.improvement.rst b/docs/upcoming_changes/9926.improvement.rst
new file mode 100644
index 00000000000..0fbf36f8164
--- /dev/null
+++ b/docs/upcoming_changes/9926.improvement.rst
@@ -0,0 +1,4 @@
+Implemented support for ``np.frombuffer`` kwargs ``offset`` and ``count``
+-------------------------------------------------------------------------
+
+``np.frombuffer`` now support ``offset`` and ``count`` kwargs.
\ No newline at end of file
diff --git a/numba/np/arrayobj.py b/numba/np/arrayobj.py
index 740355599e6..66a1e352451 100644
--- a/numba/np/arrayobj.py
+++ b/numba/np/arrayobj.py
@@ -5265,12 +5265,14 @@ def impl(arr):
@intrinsic
-def np_frombuffer(typingctx, buffer, dtype, retty):
+def np_frombuffer(typingctx, buffer, dtype, count, offset, retty):
ty = retty.instance_type
- sig = ty(buffer, dtype, retty)
+ sig = ty(buffer, dtype, count, offset, retty)
def codegen(context, builder, sig, args):
bufty = sig.args[0]
+ arg_count = args[2]
+ arg_offset = args[3]
aryty = sig.return_type
buf = make_array(bufty)(context, builder, value=args[0])
@@ -5281,6 +5283,24 @@ def codegen(context, builder, sig, args):
itemsize = get_itemsize(context, aryty)
ll_itemsize = Constant(buf.itemsize.type, itemsize)
nbytes = builder.mul(buf.nitems, buf.itemsize)
+ ll_offset_size = builder.mul(arg_offset, ll_itemsize)
+ nbytes = builder.sub(nbytes, ll_offset_size)
+
+ nbytes_is_negative = builder.icmp_signed(
+ '<',
+ nbytes,
+ ir.Constant(arg_count.type, 0),
+ )
+
+ with builder.if_then(nbytes_is_negative, likely=False):
+ msg = "offset must be non-negative and no greater than buffer length"
+ context.call_conv.return_user_exc(builder, ValueError, (msg,))
+
+ ll_count_is_negative = builder.icmp_signed(
+ '<',
+ arg_count,
+ ir.Constant(arg_count.type, 0),
+ )
# Check that the buffer size is compatible
rem = builder.srem(nbytes, ll_itemsize)
@@ -5289,10 +5309,33 @@ def codegen(context, builder, sig, args):
msg = "buffer size must be a multiple of element size"
context.call_conv.return_user_exc(builder, ValueError, (msg,))
- shape = cgutils.pack_array(builder, [builder.sdiv(nbytes, ll_itemsize)])
+ # Compute number of elements based on count
+ with builder.if_else(ll_count_is_negative) as (then_block, else_block):
+ with then_block:
+ bb_if = builder.basic_block
+ num_whole = builder.sdiv(nbytes, ll_itemsize)
+ with else_block:
+ bb_else = builder.basic_block
+
+ ll_itemcount = builder.phi(arg_count.type)
+ ll_itemcount.add_incoming(num_whole, bb_if)
+ ll_itemcount.add_incoming(arg_count, bb_else)
+
+ # Ensure we don’t exceed the buffer size
+ ll_required_size = builder.mul(ll_itemcount, ll_itemsize)
+ is_too_large = builder.icmp_unsigned('>', ll_required_size, nbytes)
+
+ with builder.if_then(is_too_large, likely=False):
+ msg = "buffer is smaller than requested size"
+ context.call_conv.return_user_exc(builder, ValueError, (msg,))
+
+ # Set shape and strides
+ shape = cgutils.pack_array(builder, [ll_itemcount])
strides = cgutils.pack_array(builder, [ll_itemsize])
+
+ data = builder.gep(buf.data, [arg_offset])
data = builder.bitcast(
- buf.data, context.get_value_type(out_datamodel.get_type('data'))
+ data, context.get_value_type(out_datamodel.get_type('data'))
)
populate_array(out_ary,
@@ -5301,15 +5344,16 @@ def codegen(context, builder, sig, args):
strides=strides,
itemsize=ll_itemsize,
meminfo=buf.meminfo,
- parent=buf.parent,)
+ parent=buf.parent)
res = out_ary._getvalue()
return impl_ret_borrowed(context, builder, sig.return_type, res)
+
return sig, codegen
@overload(np.frombuffer)
-def impl_np_frombuffer(buffer, dtype=float):
+def impl_np_frombuffer(buffer, dtype=float, count=-1, offset=0):
_check_const_str_dtype("frombuffer", dtype)
if not isinstance(buffer, types.Buffer) or buffer.layout != 'C':
@@ -5317,8 +5361,8 @@ def impl_np_frombuffer(buffer, dtype=float):
raise errors.TypingError(msg)
if (dtype is float or
- (isinstance(dtype, types.Function) and dtype.typing_key is float) or
- is_nonelike(dtype)): #default
+ (isinstance(dtype, types.Function) and dtype.typing_key is float) or
+ is_nonelike(dtype)): # default
nb_dtype = types.double
else:
nb_dtype = ty_parse_dtype(dtype)
@@ -5331,8 +5375,9 @@ def impl_np_frombuffer(buffer, dtype=float):
f"np.frombuffer({buffer}, {dtype})")
raise errors.TypingError(msg)
- def impl(buffer, dtype=float):
- return np_frombuffer(buffer, dtype, retty)
+ def impl(buffer, dtype=float, count=-1, offset=0):
+ return np_frombuffer(buffer, dtype, count, offset, retty)
+
return impl
diff --git a/numba/tests/test_array_methods.py b/numba/tests/test_array_methods.py
index fa2608b2163..49f37a20b7f 100644
--- a/numba/tests/test_array_methods.py
+++ b/numba/tests/test_array_methods.py
@@ -146,17 +146,17 @@ def array_astype(arr):
return array_astype
-def np_frombuffer(b):
+def np_frombuffer(b, dtype: np.dtype = np.float64, count=-1, offset=0):
"""
np.frombuffer() on a Python-allocated buffer.
"""
- return np.frombuffer(b)
+ return np.frombuffer(b, dtype=dtype, count=count, offset=offset)
-def np_frombuffer_dtype(b):
- return np.frombuffer(b, dtype=np.complex64)
+def np_frombuffer_dtype(b, count=-1, offset=0):
+ return np.frombuffer(b, dtype=np.complex64, count=count, offset=offset)
-def np_frombuffer_dtype_str(b):
- return np.frombuffer(b, dtype='complex64')
+def np_frombuffer_dtype_str(b, count=-1, offset=0):
+ return np.frombuffer(b, dtype='complex64', count=count, offset=offset)
def np_frombuffer_allocated(shape):
"""
@@ -1806,6 +1806,55 @@ def test_array_ctor_with_dtype_arg(self):
args = n, np.dtype('f4')
np.testing.assert_array_equal(pyfunc(*args), cfunc(*args))
+ def test_frombuffer_offset(self):
+ # Expect to skip the first two elements (offset = 2 bytes)
+ buffer = np.arange(8, dtype=np.uint8)
+ offset = 2
+ result = np_frombuffer(buffer, dtype=buffer.dtype, offset=offset)
+ expected = np.array([2, 3, 4, 5, 6, 7], dtype=buffer.dtype)
+ np.testing.assert_array_equal(result, expected)
+
+ def test_frombuffer_count(self):
+ # Expect to read only 4 elements
+ buffer = np.arange(24, dtype=np.uint8)
+ count = 4
+ result = np_frombuffer(buffer, dtype=buffer.dtype, count=count)
+ expected = np.array([0, 1, 2, 3], dtype=buffer.dtype)
+ np.testing.assert_array_equal(result, expected)
+
+ def test_frombuffer_count_negative_means_all(self):
+ # Expect to read only 4 elements
+ buffer = np.arange(8, dtype=np.uint8)
+ result = np_frombuffer(buffer, dtype=buffer.dtype, count=-1)
+ expected = np.array([0, 1, 2, 3, 4, 5, 6, 7], dtype=buffer.dtype)
+ np.testing.assert_array_equal(result, expected)
+
+ def test_frombuffer_offset_and_count(self):
+ # Skip first 2 bytes and read 3 elements
+ buffer = np.arange(24, dtype=np.uint8)
+ offset = 2
+ count = 3
+ result = np_frombuffer(buffer, dtype=buffer.dtype, offset=offset, count=count)
+ expected = np.array([2, 3, 4], dtype=buffer.dtype)
+ np.testing.assert_array_equal(result, expected)
+
+ def test_frombuffer_invalid_offset(self):
+ # Test behavior when offset exceeds buffer size
+ buffer = np.arange(24, dtype=np.uint8)
+ offset = len(buffer) + 1 # Invalid offset
+ msg = "offset must be non-negative and no greater than buffer length"
+ with self.assertRaisesRegex(ValueError, msg):
+ np_frombuffer(buffer, dtype=buffer.dtype, offset=offset)
+
+ def test_frombuffer_invalid_count(self):
+ # Test behavior when count exceeds the possible number of elements
+ buffer = np.arange(24, dtype=np.uint8)
+ count = len(buffer) + 1 # Count exceeds buffer size
+ msg = "buffer is smaller than requested size"
+ with self.assertRaisesRegex(ValueError, msg):
+ np.frombuffer(buffer, dtype=buffer.dtype, count=count)
+
+
class TestArrayComparisons(TestCase):
def test_identity(self):