File 2821-Optimise-prim_file-in-write-mode.patch of Package erlang

From 78a61324d61677ec91fed93bad66e4d3fa03deb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Muska=C5=82a?= <micmus@fb.com>
Date: Mon, 12 May 2025 17:14:47 +0100
Subject: [PATCH] Optimise prim_file in write mode

This adds two optimisations to `prim_file`.

For files in various write modes, it skips allocating the read buffer -
this saves a lot on allocations when writing a lot of files.

For various write operations, if file is opened in just write mode,
we don't need to call `seek` to reset position before writing,
we can just write, since we couldn't have read from the file,
there's nothing to synchronise. This saves one syscall per write.
---
 erts/preloaded/src/prim_file.erl | 102 +++++++++++++++++++------------
 1 file changed, 63 insertions(+), 39 deletions(-)

diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl
index dac78f1743..de6f018605 100644
--- a/erts/preloaded/src/prim_file.erl
+++ b/erts/preloaded/src/prim_file.erl
@@ -134,7 +134,7 @@ open(Name, Modes) ->
     %% the public file interface, which has leaked through for ages because of
     %% "raw files."
     try open_nif(encode_path(Name), Modes) of
-        {ok, Ref} -> {ok, make_fd(Ref, Modes)};
+        {ok, Ref} -> {ok, make_fd(Modes, Ref)};
         {error, Reason} -> {error, Reason}
     catch
         error:badarg -> {error, badarg}
@@ -142,14 +142,14 @@ open(Name, Modes) ->
 
 file_desc_to_ref(FileDescriptorId, Modes) ->
     try file_desc_to_ref_nif(FileDescriptorId) of
-        {ok, Ref} -> {ok, make_fd(Ref, Modes)};
+        {ok, Ref} -> {ok, make_fd(Modes, Ref)};
         {error, Reason} -> {error, Reason}
     catch
         error:badarg -> {error, badarg}
     end.
 
-make_fd(FRef, Modes) ->
-    #file_descriptor{module = ?MODULE, data = build_fd_data(FRef, Modes) }.
+make_fd(Modes, FRef) ->
+    #file_descriptor{module = ?MODULE, data = build_fd_data(Modes, FRef) }.
 
 close(Fd) ->
     try
@@ -163,9 +163,10 @@ read(Fd, Size) ->
     try
         #{ handle := FRef,
            r_ahead_size := RASz,
-           r_buffer := RBuf } = get_fd_data(Fd),
+           r_buffer := RBuf } = get_fd_data_for_read(Fd),
         read_1(FRef, RBuf, prim_buffer:size(RBuf), RASz, Size)
     catch
+        throw:Err -> Err;
         error:badarg -> {error, badarg}
     end.
 
@@ -206,11 +207,12 @@ read_line(Fd) ->
     try
         #{ handle := FRef,
            r_ahead_size := RASz,
-           r_buffer := RBuf } = get_fd_data(Fd),
+           r_buffer := RBuf } = get_fd_data_for_read(Fd),
         SearchResult = prim_buffer:find_byte_index(RBuf, $\n),
         LineSize = max(?MIN_READLINE_SIZE, RASz),
         read_line_1(FRef, RBuf, SearchResult, LineSize)
     catch
+        throw:Err -> Err;
         error:badarg -> {error, badarg}
     end.
 
@@ -304,8 +306,10 @@ datasync(Fd) ->
 position(Fd, {cur, Offset}) ->
     try
         %% Adjust our current position according to how much we've read ahead.
-        #{ r_buffer := RBuf } = get_fd_data(Fd),
-        position_1(Fd, cur, Offset - prim_buffer:size(RBuf))
+        case get_fd_data(Fd) of
+            #{ r_buffer := RBuf } -> position_1(Fd, cur, Offset - prim_buffer:size(RBuf));
+            #{} -> position_1(Fd, cur, Offset)
+        end
     catch
         error:badarg -> {error, badarg}
     end;
@@ -321,9 +325,13 @@ position(Fd, eof) -> position(Fd, {eof, 0});
 position(Fd, Offset) -> position(Fd, {bof, Offset}).
 
 position_1(Fd, Mark, Offset) ->
-    #{ handle := FRef, r_buffer := RBuf } = get_fd_data(Fd),
-    prim_buffer:wipe(RBuf),
-    seek_nif(FRef, Mark, Offset).
+    case get_fd_data(Fd) of
+        #{ handle := FRef, r_buffer := RBuf } ->
+            prim_buffer:wipe(RBuf),
+            seek_nif(FRef, Mark, Offset);
+        #{ handle := FRef } ->
+            seek_nif(FRef, Mark, Offset)
+    end.
 
 pread(Fd, Offset, Size) ->
     try
@@ -361,9 +369,14 @@ pread_list(FRef, [{Offset, Size} | Rest], ResultList) ->
 
 pwrite(Fd, Offset, IOData) ->
     try
-        #{ handle := FRef, r_buffer := RBuf } = get_fd_data(Fd),
-        prim_buffer:wipe(RBuf),
-        pwrite_plain(FRef, Offset, erlang:iolist_to_iovec(IOData))
+        IOVec = erlang:iolist_to_iovec(IOData),
+        case get_fd_data(Fd) of
+            #{ handle := FRef, r_buffer := RBuf } ->
+                prim_buffer:wipe(RBuf),
+                pwrite_plain(FRef, Offset, IOVec);
+            #{ handle := FRef } ->
+                pwrite_plain(FRef, Offset, IOVec)
+        end
     catch
         error:badarg -> {error, badarg}
     end.
@@ -379,9 +392,13 @@ pwrite_plain(FRef, Offset, IOVec) ->
 
 pwrite(Fd, LocBytes) ->
     try
-        #{ handle := FRef, r_buffer := RBuf } = get_fd_data(Fd),
-        prim_buffer:wipe(RBuf),
-        pwrite_list(FRef, LocBytes, 0)
+        case get_fd_data(Fd) of
+            #{ handle := FRef, r_buffer := RBuf } ->
+                prim_buffer:wipe(RBuf),
+                pwrite_list(FRef, LocBytes, 0);
+            #{ handle := FRef } ->
+                pwrite_list(FRef, LocBytes, 0)
+        end
     catch
         error:badarg -> {error, badarg}
     end.
@@ -470,10 +487,13 @@ get_handle(Fd) ->
 %% Resets the write head to the position the user believes we're at, which may
 %% not be the same as the real one when read caching is in effect.
 reset_write_position(Fd) ->
-    #{ r_buffer := RBuf } = Fd#file_descriptor.data,
-    case prim_buffer:size(RBuf) of
-        Size when Size > 0 -> position(Fd, cur);
-        Size when Size =:= 0 -> ok
+    case Fd#file_descriptor.data of
+        #{ r_buffer := RBuf } ->
+            case prim_buffer:size(RBuf) of
+                Size when Size > 0 -> position(Fd, cur);
+                Size when Size =:= 0 -> ok
+            end;
+        _ -> ok
     end.
 
 get_fd_data(#file_descriptor{ data = Data }) ->
@@ -483,24 +503,28 @@ get_fd_data(#file_descriptor{ data = Data }) ->
         _ -> error(not_on_controlling_process)
     end.
 
-build_fd_data(FRef, Modes) ->
-    Defaults =
-        #{ owner => self(),
-           handle => FRef,
-           r_ahead_size => 0,
-           r_buffer => prim_buffer:new() },
-    fill_fd_option_map(Modes, Defaults).
-
-fill_fd_option_map([], Map) ->
-    Map;
-
-fill_fd_option_map([read_ahead | Modes], Map) ->
-    fill_fd_option_map([{read_ahead, 64 bsl 10} | Modes], Map);
-fill_fd_option_map([{read_ahead, Size} | Modes], Map) ->
-    fill_fd_option_map(Modes, Map#{ r_ahead_size => Size });
-
-fill_fd_option_map([_Ignored | Modes], Map) ->
-    fill_fd_option_map(Modes, Map).
+get_fd_data_for_read(Fd) ->
+    case get_fd_data(Fd) of
+        #{ r_buffer := _ } = Data -> Data;
+        _ -> throw({error, ebadf})
+    end.
+
+build_fd_data(Modes, FRef) -> build_fd_data(Modes, FRef, self(), 0, default).
+
+build_fd_data([], FRef, Owner, _RASz, write) ->
+    #{ handle => FRef, owner => Owner };
+build_fd_data([], FRef, Owner, RASz, Mode) when Mode =:= read; Mode =:= default ->
+    #{ handle => FRef, owner => Owner, r_ahead_size => RASz, r_buffer => prim_buffer:new() };
+build_fd_data([read_ahead | Modes], FRef, Owner, _, Mode) ->
+    build_fd_data(Modes, FRef, Owner, 64 bsl 10, Mode);
+build_fd_data([{read_ahead, RASz} | Modes], FRef, Owner, _, Mode) ->
+    build_fd_data(Modes, FRef, Owner, RASz, Mode);
+build_fd_data([write | Modes], FRef, Owner, RASz, default) ->
+    build_fd_data(Modes, FRef, Owner, RASz, write);
+build_fd_data([read | Modes], FRef, Owner, RASz, _Mode) ->
+    build_fd_data(Modes, FRef, Owner, RASz, read);
+build_fd_data([_Ignored | Modes], FRef, Owner, RASz, Mode) ->
+    build_fd_data(Modes, FRef, Owner, RASz, Mode).
 
 open_nif(_Name, _Modes) ->
     erlang:nif_error(undef).
-- 
2.51.0

openSUSE Build Service is sponsored by