A new user interface for you! Read more...

File 0445-Eliminate-crash-in-crashdump_viewer-reading-some-lit.patch of Package erlang

From e3a31aa4b06659762d94e487a4a3d2918fe91096 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Fri, 19 Oct 2018 12:40:30 +0200
Subject: [PATCH] Eliminate crash in crashdump_viewer reading some literal maps

Literal maps with complex keys such as:

    #{"one"=>1,"two"=>2,"three"=>3,"four"=>4}.

would produce a crash dump that `crashdump_viewer` was unable
to read.

https://bugs.erlang.org/browse/ERL-722
---
 lib/observer/src/crashdump_viewer.erl  | 155 ++++++++++++++++++++++++---------
 lib/observer/test/crashdump_helper.erl |  21 ++++-
 2 files changed, 134 insertions(+), 42 deletions(-)

diff --git a/lib/observer/src/crashdump_viewer.erl b/lib/observer/src/crashdump_viewer.erl
index 14b086ff58..276b209ce5 100644
--- a/lib/observer/src/crashdump_viewer.erl
+++ b/lib/observer/src/crashdump_viewer.erl
@@ -1594,31 +1594,92 @@ read_heap(Fd,Pid,DecodeOpts,Dict0) ->
 	    Dict0
     end.
 
-read_heap(DecodeOpts,Dict0) ->
-    %% This function is never called if the dump is truncated in {?proc_heap,Pid}
-    case get(fd) of
-	end_of_heap ->
+read_heap(DecodeOpts, Dict0) ->
+    %% This function is never called if the dump is truncated in
+    %% {?proc_heap,Pid}.
+    %%
+    %% It is not always possible to reconstruct the heap terms
+    %% in a single pass, especially if maps are involved.
+    %% See crashdump_helper:literal_map/0 for an example.
+    %%
+    %% Therefore, we need two passes. In the first pass
+    %% we collect all lines without parsing them, and in the
+    %% second pass we parse them.
+    %%
+    %% The first pass follows.
+
+    Lines0 = read_heap_lines(),
+
+    %% Save a map of all unprocessed lines so that deref_ptr() can
+    %% access any line when there are references to terms not yet
+    %% built.
+
+    LineMap = maps:from_list(Lines0),
+    put(line_map, LineMap),
+
+    %% Refc binaries (tag "Yc") must be processed before any sub
+    %% binaries (tag "Ys") referencing them, so we make sure to
+    %% process all the refc binaries first.
+    %%
+    %% The other lines can be processed in any order, but processing
+    %% them in the reverse order compared to how they are printed in
+    %% the crash dump seems to minimize the number of references to
+    %% terms that have not yet been built. That happens to be the
+    %% order of the line list as returned by read_heap_lines/0.
+
+    RefcBins = [Refc || {_,<<"Yc",_/binary>>}=Refc <- Lines0],
+    Lines = RefcBins ++ Lines0,
+
+    %% Second pass.
+
+    init_progress("Processing terms", map_size(LineMap)),
+    Dict = parse_heap_terms(Lines, DecodeOpts, Dict0),
+    erase(line_map),
+    end_progress(),
+    Dict.
+
+read_heap_lines() ->
+    read_heap_lines_1(get(fd), []).
+
+read_heap_lines_1(Fd, Acc) ->
+    case bytes(Fd) of
+        "=" ++ _next_tag ->
             end_progress(),
-	    Dict0;
-	Fd ->
-	    case bytes(Fd) of
-		"=" ++ _next_tag ->
-                    end_progress(),
-		    put(fd, end_of_heap),
-		    Dict0;
-		Line ->
-                    update_progress(length(Line)+1),
-		    Dict = parse(Line,DecodeOpts,Dict0),
-		    read_heap(DecodeOpts,Dict)
-	    end
+            put(fd, end_of_heap),
+            Acc;
+        Line0 ->
+            update_progress(length(Line0)+1),
+            {Addr,":"++Line1} = get_hex(Line0),
+
+            %% Reduce the memory consumption by converting the
+            %% line to a binary. Measurements show that it may also
+            %% be benefical for performance, too, because it makes the
+            %% garbage collections cheaper.
+
+            Line = list_to_binary(Line1),
+            read_heap_lines_1(Fd, [{Addr,Line}|Acc])
     end.
 
-parse(Line0, DecodeOpts, Dict0) ->
-    {Addr,":"++Line1} = get_hex(Line0),
-    {_Term,Line,Dict} = parse_heap_term(Line1, Addr, DecodeOpts, Dict0),
-    [] = skip_blanks(Line),
+parse_heap_terms([{Addr,Line0}|T], DecodeOpts, Dict0) ->
+    case gb_trees:is_defined(Addr, Dict0) of
+        true ->
+            %% Already parsed (by a recursive call from do_deref_ptr()
+            %% to parse_line()). Nothing to do.
+            parse_heap_terms(T, DecodeOpts, Dict0);
+        false ->
+            %% Parse this previously unparsed term.
+            Dict = parse_line(Addr, Line0, DecodeOpts, Dict0),
+            parse_heap_terms(T, DecodeOpts, Dict)
+    end;
+parse_heap_terms([], _DecodeOpts, Dict) ->
     Dict.
 
+parse_line(Addr, Line0, DecodeOpts, Dict0) ->
+    update_progress(1),
+    Line1 = binary_to_list(Line0),
+    {_Term,Line,Dict} = parse_heap_term(Line1, Addr, DecodeOpts, Dict0),
+    [] = skip_blanks(Line),                     %Assertion.
+    Dict.
 
 %%-----------------------------------------------------------------
 %% Page with one port
@@ -2871,16 +2932,18 @@ parse_atom_translation_table(N, Line0, As) ->
 
 
 deref_ptr(Ptr, Line, DecodeOpts, D) ->
-    Lookup = fun(D0) ->
-                     gb_trees:lookup(Ptr, D0)
-             end,
+    Lookup0 = fun(D0) ->
+                      gb_trees:lookup(Ptr, D0)
+              end,
+    Lookup = wrap_line_map(Ptr, Lookup0),
     do_deref_ptr(Lookup, Line, DecodeOpts, D).
 
 deref_bin(Binp0, Offset, Sz, Line, DecodeOpts, D) ->
     Binp = Binp0 bor DecodeOpts#dec_opts.bin_addr_adj,
-    Lookup = fun(D0) ->
-                     lookup_binary(Binp, Offset, Sz, D0)
-             end,
+    Lookup0 = fun(D0) ->
+                      lookup_binary(Binp, Offset, Sz, D0)
+              end,
+    Lookup = wrap_line_map(Binp, Lookup0),
     do_deref_ptr(Lookup, Line, DecodeOpts, D).
 
 lookup_binary(Binp, Offset, Sz, D) ->
@@ -2899,26 +2962,36 @@ lookup_binary(Binp, Offset, Sz, D) ->
             end
     end.
 
+wrap_line_map(Ptr, Lookup) ->
+    wrap_line_map_1(get(line_map), Ptr, Lookup).
+
+wrap_line_map_1(#{}=LineMap, Ptr, Lookup) ->
+    fun(D) ->
+            case Lookup(D) of
+                {value,_}=Res ->
+                    Res;
+                none ->
+                    case LineMap of
+                        #{Ptr:=Line} ->
+                            {line,Ptr,Line};
+                        #{} ->
+                            none
+                    end
+            end
+    end;
+wrap_line_map_1(undefined, _Ptr, Lookup) ->
+    Lookup.
+
 do_deref_ptr(Lookup, Line, DecodeOpts, D0) ->
     case Lookup(D0) of
 	{value,Term} ->
 	    {Term,Line,D0};
 	none ->
-	    case get(fd) of
-		end_of_heap ->
-                    put(incomplete_heap,true),
-		    {['#CDVIncompleteHeap'],Line,D0};
-		Fd ->
-		    case bytes(Fd) of
-			"="++_ ->
-			    put(fd, end_of_heap),
-			    do_deref_ptr(Lookup, Line, DecodeOpts, D0);
-			L ->
-                            update_progress(length(L)+1),
-			    D = parse(L, DecodeOpts, D0),
-			    do_deref_ptr(Lookup, Line, DecodeOpts, D)
-		    end
-	    end
+            put(incomplete_heap, true),
+            {['#CDVIncompleteHeap'],Line,D0};
+        {line,Addr,NewLine} ->
+            D = parse_line(Addr, NewLine, DecodeOpts, D0),
+            do_deref_ptr(Lookup, Line, DecodeOpts, D)
     end.
 
 get_hex(L) ->
diff --git a/lib/observer/test/crashdump_helper.erl b/lib/observer/test/crashdump_helper.erl
index 145ff56b71..d8f4e046ae 100644
--- a/lib/observer/test/crashdump_helper.erl
+++ b/lib/observer/test/crashdump_helper.erl
@@ -142,4 +142,23 @@ create_maps() ->
     Map3 = lists:foldl(fun(I, A) ->
                                A#{I=>I*I}
                        end, Map2, lists:seq(-10, 0)),
-    #{a=>Map0,b=>Map1,c=>Map2,d=>Map3,e=>#{}}.
+    #{a=>Map0,b=>Map1,c=>Map2,d=>Map3,e=>#{},literal=>literal_map()}.
+
+literal_map() ->
+    %% A literal map such as the one below will produce a heap dump
+    %% like this:
+    %%
+    %%   Address1:t4:H<Address3>,H<Address4>,H<Address5>,H<Address6>
+    %%   Address2:Mf4:H<Adress1>:I1,I2,I3,I4
+    %%   Address3: ...  % "one"
+    %%   Address4: ...  % "two"
+    %%   Address5: ...  % "three"
+    %%   Address6: ...  % "four"
+    %%
+    %% The map cannot be reconstructed in a single sequential pass.
+    %%
+    %% To reconstruct the map, first the string keys "one"
+    %% through "four" must be reconstructed, then the tuple at
+    %% Adress1, then the map at Address2.
+
+    #{"one"=>1,"two"=>2,"three"=>3,"four"=>4}.
-- 
2.16.4