File 0243-kernel-fix-bug-with-composing-unicode-character.patch of Package erlang
From 126361cc962eed6a00032c9943df78ea56365f08 Mon Sep 17 00:00:00 2001
From: frazze-jobb <frazze@erlang.org>
Date: Wed, 16 Oct 2024 14:37:49 +0200
Subject: [PATCH] kernel: fix bug with composing unicode character
When the first charaacter is a composing unicode character like
ZWNJ ZWJ characters or U+0308 it binds with the last character
that is part of the prompt, and creates a grapheme cluster.
In this case we need to split up the grapheme cluster and remove
the composing character and keep the character part of the prompt.
---
lib/kernel/src/prim_tty.erl | 32 ++++++++++++------
lib/kernel/test/interactive_shell_SUITE.erl | 36 +++++++++++++++++++++
2 files changed, 59 insertions(+), 9 deletions(-)
diff --git a/lib/kernel/src/prim_tty.erl b/lib/kernel/src/prim_tty.erl
index 4c08fe9f4c..f7ff75e191 100644
--- a/lib/kernel/src/prim_tty.erl
+++ b/lib/kernel/src/prim_tty.erl
@@ -814,13 +814,18 @@ handle_request(State = #state{ unicode = U, cols = W }, {delete, N}) when N < 0
BBCols = cols(State#state.buffer_before, U),
BACols = cols(State#state.buffer_after, U),
NewBBCols = cols(NewBB, U),
- Output = [move_cursor(State, NewBBCols + DelCols, NewBBCols),
- encode(State#state.buffer_after,U),
- lists:duplicate(DelCols, $\s),
- xnfix(State, NewBBCols + BACols + DelCols),
- move_cursor(State, NewBBCols + BACols + DelCols, NewBBCols)],
+ %% DelCols is 0 only when we are removing a ZWJ or a ZWNJ that is the first character of
+ %% the user buffer. We remove the character from the buffer, but we don't output anything
+ Output = if
+ DelCols =:= 0 -> "";
+ true -> [move_cursor(State, NewBBCols + DelCols, NewBBCols),
+ encode(State#state.buffer_after,U),
+ lists:duplicate(DelCols, $\s),
+ xnfix(State, NewBBCols + BACols + DelCols),
+ move_cursor(State, NewBBCols + BACols + DelCols, NewBBCols)]
+ end,
NewState0 = State#state{ buffer_before = NewBB },
- if State#state.lines_after =/= [], (BBCols+BACols+N) rem W =:= 0 ->
+ if DelCols =/= 0, State#state.lines_after =/= [], (BBCols+BACols+N) rem W =:= 0 ->
{Delete, _} = handle_request(State, delete_line),
{Redraw, NewState1} = handle_request(NewState0, redraw_prompt_pre_deleted),
{[Delete, Redraw], NewState1};
@@ -972,9 +977,15 @@ split_cols(_N, [], Acc, Chars, Cols, _Unicode) ->
{Chars, Cols, Acc, []};
split_cols(N, [Char | T], Acc, Cnt, Cols, Unicode) when is_integer(Char) ->
split_cols(N - npwcwidth(Char), T, [Char | Acc], Cnt + 1, Cols + npwcwidth(Char, Unicode), Unicode);
-split_cols(N, [Chars | T], Acc, Cnt, Cols, Unicode) when is_list(Chars) ->
- split_cols(N - length(Chars), T, [Chars | Acc],
- Cnt + length(Chars), Cols + cols(Chars, Unicode), Unicode).
+split_cols(N, [GC|T], Acc, Cnt, Cols, Unicode) when is_list(GC) ->
+ %% We have to remove parts of the grapheme cluster
+ CGC = cols(GC, Unicode),
+ if CGC > N ->
+ {CntList2, ColsList2, List2, List1} = split_cols(N, GC, Unicode),
+ split_cols(N-ColsList2, [List1|T], List2 ++ Acc, Cnt+CntList2, Cols+ColsList2, Unicode);
+ true ->
+ split_cols(N-CGC, T, GC ++ Acc, Cnt+length(GC), Cols+CGC, Unicode)
+ end.
%% Split the buffer after N logical characters returning
%% the number of real characters deleted and the column length
@@ -991,6 +1002,9 @@ split(_N, [], Acc, Chars, Cols, _Unicode) ->
{Chars, Cols, Acc, []};
split(N, [Char | T], Acc, Cnt, Cols, Unicode) when is_integer(Char) ->
split(N - 1, T, [Char | Acc], Cnt + 1, Cols + npwcwidth(Char, Unicode), Unicode);
+split(N, [GC|T], Acc, Cnt, Cols, Unicode) when is_list(GC), N < length(GC) ->
+ {NumL2, ColsL2, List2, List1} = split(N, GC, Unicode),
+ split(N-NumL2, List1 ++ T, List2 ++ Acc, Cnt+NumL2, Cols+ColsL2, Unicode);
split(N, [Chars | T], Acc, Cnt, Cols, Unicode) when is_list(Chars) ->
split(N - length(Chars), T, [Chars | Acc],
Cnt + length(Chars), Cols + cols(Chars, Unicode), Unicode);
diff --git a/lib/kernel/test/interactive_shell_SUITE.erl b/lib/kernel/test/interactive_shell_SUITE.erl
index 8cefbb683c..e7bf5bdc00 100644
--- a/lib/kernel/test/interactive_shell_SUITE.erl
+++ b/lib/kernel/test/interactive_shell_SUITE.erl
@@ -51,6 +51,7 @@
shell_navigation/1, shell_multiline_navigation/1, shell_multiline_prompt/1,
shell_xnfix/1, shell_delete/1,
shell_transpose/1, shell_search/1, shell_insert/1,
+ shell_combining_unicode/1,
shell_update_window/1, shell_small_window_multiline_navigation/1, shell_huge_input/1,
shell_invalid_unicode/1, shell_support_ansi_input/1,
shell_invalid_ansi/1, shell_suspend/1, shell_full_queue/1,
@@ -136,6 +137,7 @@ groups() ->
{tty_tests, [parallel],
[shell_navigation, shell_multiline_navigation, shell_multiline_prompt,
shell_xnfix, shell_delete, shell_format, shell_help,
+ shell_combining_unicode,
shell_transpose, shell_search, shell_insert,
shell_update_window, shell_small_window_multiline_navigation, shell_huge_input,
shell_support_ansi_input,
@@ -921,6 +923,40 @@ shell_search(C) ->
ok
end.
+shell_combining_unicode(Config) ->
+ %% Tests that its possible to delete a combining unicode character as
+ %% the first character of the input line.
+ Term = start_tty(Config),
+ X = 0,
+ check_location(Term, {X,0}),
+ %% COMBINING DIAERESIS, ZWNJ, ZWJ
+ CombiningUnicode = [776, 8204, 8205],
+ try
+ [
+ begin
+ send_tty(Term,[J]),
+ send_tty(Term,"BSpace"),
+ check_location(Term, {X,0}),
+ send_tty(Term,"BSpace"),
+ check_location(Term, {X,0}),
+ send_tty(Term,[J,$a]),
+ send_tty(Term,"BSpace"),
+ check_location(Term, {X,0}),
+ send_tty(Term,"BSpace"),
+ check_location(Term, {X,0}),
+ send_tty(Term,[$a,J]),
+ send_tty(Term,"BSpace"),
+ check_location(Term, {X,0}),
+ send_tty(Term,"BSpace"),
+ check_location(Term, {X,0}),
+ send_tty(Term,[$",$a,J,$b,$",$.,10]),
+ check_location(Term, {X,0})
+ end || J <- CombiningUnicode],
+ ok
+ after
+ stop_tty(Term)
+ end.
+
shell_insert(Config) ->
Term = start_tty(Config),
--
2.43.0