From d1f840eb601163770476b869d0e5acf99e6ade89 Mon Sep 17 00:00:00 2001 From: ggurdin <46800240+ggurdin@users.noreply.github.com> Date: Mon, 29 Sep 2025 16:00:16 -0400 Subject: [PATCH] 4181 japanese words grouped when should be separate (#4184) * fix: only continue pickup up adjacent tokens while they are punctuation * uncomment commented out code --- lib/pangea/message_token_text/tokens_util.dart | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/pangea/message_token_text/tokens_util.dart b/lib/pangea/message_token_text/tokens_util.dart index d0959feca..b49c1f464 100644 --- a/lib/pangea/message_token_text/tokens_util.dart +++ b/lib/pangea/message_token_text/tokens_util.dart @@ -35,6 +35,12 @@ class TokenPosition { required this.startIndex, required this.endIndex, }); + + Map toJson() => { + 'token': token?.toJson(), + 'startIndex': startIndex, + 'endIndex': endIndex, + }; } class TokensUtil { @@ -157,10 +163,14 @@ class TokensUtil { final int startIndex = i; if (isPunct || nextIsPunct) { - while (nextToken != null && currentToken?.end == nextToken.start) { + bool punctPickup = true; + while (nextToken != null && + currentToken?.end == nextToken.start && + punctPickup) { i++; currentToken = nextToken; nextToken = i < tokens.length - 1 ? tokens[i + 1] : null; + punctPickup = nextToken?.pos == 'PUNCT'; } }