File 2203-Reduce-memory-use-of-system_information-to_file-1.patch of Package erlang

From d468fcbb1dfeef6c68f6033df9bff57dfcd748af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?John=20H=C3=B6gberg?= <john@erlang.org>
Date: Wed, 10 Jan 2018 13:34:05 +0100
Subject: [PATCH] Reduce memory use of system_information:to_file/1

The previous implementation generated a term, converted it to plain
text with io_lib:format/2, and then converted that to a binary
before writing it to disk.

We now emit the term as we go, which should make it a bit safer to
extract this information under load.
---
 lib/runtime_tools/src/system_information.erl | 232 ++++++++++++++++++---------
 1 file changed, 158 insertions(+), 74 deletions(-)

diff --git a/lib/runtime_tools/src/system_information.erl b/lib/runtime_tools/src/system_information.erl
index df25297eb9..6a9f3ff58a 100644
--- a/lib/runtime_tools/src/system_information.erl
+++ b/lib/runtime_tools/src/system_information.erl
@@ -75,43 +75,37 @@ load_report(file, File)   -> load_report(data, from_file(File));
 load_report(data, Report) ->
     ok = start_internal(), gen_server:call(?SERVER, {load_report, Report}, infinity).
 
-report() -> [
-	{init_arguments,    init:get_arguments()},
-	{code_paths,        code:get_path()},
-	{code,              code()},
-	{system_info,       erlang_system_info()},
-	{erts_compile_info, erlang:system_info(compile_info)},
-	{beam_dynamic_libraries, get_dynamic_libraries()},
-	{environment_erts,  os_getenv_erts_specific()},
-	{environment,       [split_env(Env) || Env <- os:getenv()]},
-	{sanity_check,      sanity_check()}	     
-    ].
+report() ->
+    %% This is ugly but beats having to maintain two distinct implementations,
+    %% and we don't really care about memory use since it's internal and
+    %% undocumented.
+    {ok, Fd} = file:open([], [ram, read, write]),
+    to_fd(Fd),
+    {ok, _} = file:position(Fd, bof),
+    from_fd(Fd).
 
 -spec to_file(FileName) -> ok | {error, Reason} when
       FileName :: file:name_all(),
       Reason :: file:posix() | badarg | terminated | system_limit.
 
 to_file(File) ->
-    file:write_file(File, iolist_to_binary([
-		io_lib:format("{system_information_version, ~p}.~n", [
-			?REPORT_FILE_VSN
-		    ]),
-		io_lib:format("{system_information, ~p}.~n", [
-			report()
-		    ])
-	    ])).
+    case file:open(File, [raw, write, binary, delayed_write]) of
+        {ok, Fd} ->
+            try
+                to_fd(Fd)
+            after
+                file:close(Fd)
+            end;
+        {error, Reason} ->
+            {error, Reason}
+    end.
 
 from_file(File) ->
-    case file:consult(File) of
-	{ok, Data} ->
-	    case get_value([system_information_version], Data) of
-		?REPORT_FILE_VSN ->
-		    get_value([system_information], Data);
-		Vsn ->
-		    erlang:error({unknown_version, Vsn})
-	    end;
-	_ ->
-	    erlang:error(bad_report_file)
+    {ok, Fd} = file:open(File, [raw, read]),
+    try
+        from_fd(Fd)
+    after
+        file:close(Fd)
     end.
 
 applications() -> applications([]).
@@ -457,61 +451,151 @@ split_env([$=|Vs], Key) -> {lists:reverse(Key), Vs};
 split_env([I|Vs], Key)  -> split_env(Vs, [I|Key]);
 split_env([], KV)       -> lists:reverse(KV). % should not happen.
 
-%% get applications
+from_fd(Fd) ->
+    try
+        [{system_information_version, "1.0"},
+         {system_information, Data}] = consult_fd(Fd),
+        Data
+    catch
+        _:_ -> erlang:error(bad_report_file)
+    end.
 
-code() ->
-    % order is important
-    get_code_from_paths(code:get_path()).
+consult_fd(Fd) ->
+    consult_fd_1(Fd, [], {ok, []}).
+consult_fd_1(Fd, Cont0, ReadResult) ->
+    Data =
+        case ReadResult of
+            {ok, Characters} -> Characters;
+            eof -> eof
+        end,
+    case erl_scan:tokens(Cont0, Data, 1) of
+        {done, {ok, Tokens, _}, Next} ->
+            {ok, Term} = erl_parse:parse_term(Tokens),
+            [Term | consult_fd_1(Fd, [], {ok, Next})];
+        {more, Cont} ->
+            consult_fd_1(Fd, Cont, file:read(Fd, 1 bsl 20));
+        {done, {eof, _}, eof} -> []
+    end.
 
-get_code_from_paths([]) -> [];
-get_code_from_paths([Path|Paths]) ->
-    case is_application_path(Path) of
-	true -> 
-	    [{application, get_application_from_path(Path)}|get_code_from_paths(Paths)];
-	false ->
-	    [{code, [
-			{path,    Path},
-			{modules, get_modules_from_path(Path)}
-		    ]}|get_code_from_paths(Paths)]
+%%
+%% Dumps a system_information tuple to the given Fd, writing the term in chunks
+%% to avoid eating too much memory on large systems.
+%%
+
+to_fd(Fd) ->
+    EmitChunk =
+        fun(Format, Args) ->
+            ok = file:write(Fd, io_lib:format(Format, Args))
+        end,
+
+    EmitChunk("{system_information_version, ~p}.~n"
+              "{system_information,["
+                  "{init_arguments,~p},"
+                  "{code_paths,~p},",
+        [?REPORT_FILE_VSN,
+         init:get_arguments(),
+         code:get_path()]),
+
+    emit_code_info(EmitChunk),
+
+    EmitChunk(    ","  %% Note the leading comma!
+                  "{system_info,~p},"
+                  "{erts_compile_info,~p},"
+                  "{beam_dynamic_libraries,~p},"
+                  "{environment_erts,~p},"
+                  "{environment,~p},"
+                  "{sanity_check,~p}"
+              "]}.~n",
+        [erlang_system_info(),
+         erlang:system_info(compile_info),
+         get_dynamic_libraries(),
+         os_getenv_erts_specific(),
+         [split_env(Env) || Env <- os:getenv()],
+         sanity_check()]).
+
+%% Emits all modules/applications in the *code path order*
+emit_code_info(EmitChunk) ->
+    EmitChunk("{code, [", []),
+    comma_separated_foreach(EmitChunk,
+        fun(Path) ->
+            case is_application_path(Path) of
+                true -> emit_application_info(EmitChunk, Path);
+                false -> emit_code_path_info(EmitChunk, Path)
+            end
+        end, code:get_path()),
+    EmitChunk("]}", []).
+
+emit_application_info(EmitChunk, Path) ->
+    [Appfile|_] = filelib:wildcard(filename:join(Path, "*.app")),
+    case file:consult(Appfile) of
+        {ok, [{application, App, Info}]} ->
+            RtDeps = proplists:get_value(runtime_dependencies, Info, []),
+            Description = proplists:get_value(description, Info, []),
+            Version = proplists:get_value(vsn, Info, []),
+
+            EmitChunk("{application, {~p,["
+                          "{description,~p},"
+                          "{vsn,~p},"
+                          "{path,~p},"
+                          "{runtime_dependencies,~p},",
+                [App, Description, Version, Path, RtDeps]),
+            emit_module_info_from_path(EmitChunk, Path),
+            EmitChunk("]}}", [])
     end.
 
+emit_code_path_info(EmitChunk, Path) ->
+    EmitChunk("{code, ["
+                  "{path, ~p},", [Path]),
+    emit_module_info_from_path(EmitChunk, Path),
+    EmitChunk("]}", []).
+
+emit_module_info_from_path(EmitChunk, Path) ->
+    BeamFiles = filelib:wildcard(filename:join(Path, "*.beam")),
+
+    EmitChunk("{modules, [", []),
+    comma_separated_foreach(EmitChunk,
+        fun(Beam) ->
+            emit_module_info(EmitChunk, Beam)
+        end, BeamFiles),
+    EmitChunk("]}", []).
+
+emit_module_info(EmitChunk, Beam) ->
+    %% FIXME: The next three calls load *all* significant chunks onto the heap,
+    %% which may cause us to run out of memory if there's a huge module in the
+    %% code path.
+    {ok,{Mod, Md5}} = beam_lib:md5(Beam),
+
+    CompilerVersion = get_compiler_version(Beam),
+    Native = beam_is_native_compiled(Beam),
+
+    Loaded = case code:is_loaded(Mod) of
+        false -> false;
+        _     -> true
+    end,
+
+    EmitChunk("{~p,["
+                  "{loaded,~p},"
+                  "{native,~p},"
+                  "{compiler,~p},"
+                  "{md5,~p}"
+              "]}",
+        [Mod, Loaded, Native, CompilerVersion, hexstring(Md5)]).
+
+comma_separated_foreach(_EmitChunk, _Fun, []) ->
+    ok;
+comma_separated_foreach(_EmitChunk, Fun, [H]) ->
+    Fun(H);
+comma_separated_foreach(EmitChunk, Fun, [H | T]) ->
+    Fun(H),
+    EmitChunk(",", []),
+    comma_separated_foreach(EmitChunk, Fun, T).
+
 is_application_path(Path) ->
     case filelib:wildcard(filename:join(Path, "*.app")) of
 	[] -> false;
 	_  -> true
     end.
 
-get_application_from_path(Path) ->
-    [Appfile|_] = filelib:wildcard(filename:join(Path, "*.app")),
-    case file:consult(Appfile) of
-	{ok, [{application, App, Info}]} ->
-	    {App, [
-		    {description, proplists:get_value(description, Info, [])},
-		    {vsn,         proplists:get_value(vsn, Info, [])},
-		    {path,        Path},
-		    {runtime_dependencies,
-		     proplists:get_value(runtime_dependencies, Info, [])},
-		    {modules,     get_modules_from_path(Path)}
-		]}
-    end.
-
-get_modules_from_path(Path) ->
-    [ 
-	begin
-		{ok,{Mod, Md5}} = beam_lib:md5(Beam),
-		Loaded = case code:is_loaded(Mod) of
-		    false -> false;
-		    _     -> true
-		end,
-		{Mod, [
-			{loaded,   Loaded},
-			{native,   beam_is_native_compiled(Beam)},
-			{compiler, get_compiler_version(Beam)},
-			{md5,      hexstring(Md5)}
-		    ]}
-	end || Beam <- filelib:wildcard(filename:join(Path, "*.beam"))
-    ].
-
 hexstring(Bin) when is_binary(Bin) ->
     lists:flatten([io_lib:format("~2.16.0b", [V]) || <<V>> <= Bin]).
 
-- 
2.15.1

openSUSE Build Service is sponsored by