File backport-rebuild-surrounding-state.diff of Package fcitx5-unikey
diff --git a/src/unikey-im.cpp b/src/unikey-im.cpp
index 65dc5cf..b7f9421 100644
--- a/src/unikey-im.cpp
+++ b/src/unikey-im.cpp
@@ -35,10 +35,7 @@ static const unsigned int Unikey_OC[] = {
static constexpr unsigned int NUM_OUTPUTCHARSET = FCITX_ARRAY_SIZE(Unikey_OC);
static_assert(NUM_OUTPUTCHARSET == UkConvI18NAnnotation::enumLength);
-static const unsigned char WordBreakSyms[] = {
- ',', ';', ':', '.', '\"', '\'', '!', '?', ' ', '<', '>',
- '=', '+', '-', '*', '/', '\\', '_', '~', '`', '@', '#',
- '$', '%', '^', '&', '(', ')', '{', '}', '[', ']', '|'};
+static bool isWordBreakSym(unsigned char c) { return WordBreakSyms.count(c); }
static bool isWordAutoCommit(unsigned char c) {
static const std::unordered_set<unsigned char> WordAutoCommit = {
@@ -49,6 +46,18 @@ static bool isWordAutoCommit(unsigned char c) {
return WordAutoCommit.count(c);
}
+static bool isVniChar(uint32_t c) {
+ static const std::unordered_set<uint32_t> vniChar = []() {
+ std::unordered_set<uint32_t> result;
+ for (int i = 0; i < vnl_lastChar; i++) {
+ result.insert(UnicodeComposite[i]);
+ }
+ return result;
+ }();
+
+ return vniChar.count(c);
+}
+
// code from x-unikey, for convert charset that not is XUtf-8
int latinToUtf(unsigned char *dst, const unsigned char *src, int inSize,
int *pOutSize) {
@@ -145,7 +154,10 @@ public:
return;
}
- if (!*engine_->config().surroundingText) {
+ // Check if output charset is utf8, otherwise it doesn't make much
+ // sense.
+ if (!*engine_->config().surroundingText ||
+ *engine_->config().oc != UkConv::XUTF8) {
return;
}
@@ -157,7 +169,7 @@ public:
!ic_->surroundingText().isValid()) {
return;
}
- // We need text before the cursor.
+ // We need the character before the cursor.
const auto &text = ic_->surroundingText().text();
auto cursor = ic_->surroundingText().cursor();
auto length = utf8::lengthValidated(text);
@@ -175,22 +187,43 @@ public:
lastCharBeforeCursor == utf8::NOT_ENOUGH_SPACE) {
return;
}
+
+ const auto isValidStateCharacter = [](char c) {
+ return isWordAutoCommit(c) && !charutils::isdigit(c);
+ };
+
if (std::distance(start, end) != 1 ||
- !isWordAutoCommit(lastCharBeforeCursor) ||
- charutils::isdigit(lastCharBeforeCursor)) {
+ !isValidStateCharacter(lastCharBeforeCursor)) {
return;
}
// Reverse search for word auto commit.
// all char for isWordAutoCommit == true would be ascii.
- while (start != text.begin() && isWordAutoCommit(*start) &&
- !charutils::isdigit(lastCharBeforeCursor) &&
+ while (start != text.begin() && isValidStateCharacter(*start) &&
std::distance(start, end) < MAX_CONTEXT_SIZE) {
--start;
}
+
+ // The loop will move the character on to an invalid character, if it
+ // doesn't by pass the start point. Need to add by one to move it to the
+ // starting point we expect.
+ if (!isValidStateCharacter(*start)) {
+ ++start;
+ }
+
+ assert(isValidStateCharacter(*start) && start >= text.begin());
+
+ // Check if surrounding is not in a bigger part of word.
+ if (start != text.begin()) {
+ auto chr = utf8::getLastChar(text.begin(), start);
+ if (isVniChar(chr)) {
+ return;
+ }
+ }
+
FCITX_UNIKEY_DEBUG()
- << "Rebuild surrounding with: "
- << std::string_view(&*start, std::distance(start, end));
+ << "Rebuild surrounding with: \""
+ << std::string_view(&*start, std::distance(start, end)) << "\"";
for (; start != end; ++start) {
uic_.putChar(*start);
autoCommit_ = true;
@@ -463,11 +496,9 @@ void UnikeyState::preedit(KeyEvent &keyEvent) {
// commit string: if need
if (!preeditStr_.empty()) {
- for (auto wordBreakSym : WordBreakSyms) {
- if (wordBreakSym == preeditStr_.back() && wordBreakSym == sym) {
- commit();
- return keyEvent.filterAndAccept();
- }
+ if (preeditStr_.back() == sym && isWordBreakSym(sym)) {
+ commit();
+ return keyEvent.filterAndAccept();
}
}
// end commit string
diff --git a/unikey/inputproc.cpp b/unikey/inputproc.cpp
index 1e6db4a..cafa419 100644
--- a/unikey/inputproc.cpp
+++ b/unikey/inputproc.cpp
@@ -4,7 +4,9 @@
* SPDX-License-Identifier: LGPL-2.0-or-later
*/
#include "inputproc.h"
+#include <array>
#include <iostream>
+#include <unordered_set>
using namespace std;
@@ -15,7 +17,7 @@ unsigned char WordBreakSyms[] = {
'_', '~', '`', '@', '#', '$', '%', '^', '&', '(', ')', '{', '}', '[', ']'};
*/
-unsigned char WordBreakSyms[] = {
+const std::unordered_set<unsigned char> WordBreakSyms = {
',', ';', ':', '.', '\"', '\'', '!', '?', ' ', '<',
'>', '=', '+', '-', '*', '/', '\\', '_', '@', '#',
'$', '%', '&', '(', ')', '{', '}', '[', ']', '|'}; // we excluded ~, `, ^
@@ -155,9 +157,8 @@ void SetupInputClassifierTable() {
UkcMap[(unsigned char)'w'] = ukcNonVn;
UkcMap[(unsigned char)'W'] = ukcNonVn;
- int count = sizeof(WordBreakSyms) / sizeof(unsigned char);
- for (i = 0; i < count; i++)
- UkcMap[WordBreakSyms[i]] = ukcWordBreak;
+ for (auto wordBreakSym : WordBreakSyms)
+ UkcMap[wordBreakSym] = ukcWordBreak;
// Calculate IsoVnLexiMap
for (i = 0; i < 256; i++) {
diff --git a/unikey/inputproc.h b/unikey/inputproc.h
index ae52a76..bf1311f 100644
--- a/unikey/inputproc.h
+++ b/unikey/inputproc.h
@@ -8,6 +8,7 @@
#include "keycons.h"
#include "vnlexi.h"
+#include <unordered_set>
#if defined(_WIN32)
#define DllExport __declspec(dllexport)
@@ -107,4 +108,6 @@ inline VnLexiName IsoToVnLexi(unsigned int keyCode) {
return (keyCode >= 256) ? vnl_nonVnChar : IsoVnLexiMap[keyCode];
}
+extern const std::unordered_set<unsigned char> WordBreakSyms;
+
#endif