4181 japanese words grouped when should be separate (#4184)

* fix: only continue pickup up adjacent tokens while they are punctuation

* uncomment commented out code
This commit is contained in:
ggurdin 2025-09-29 16:00:16 -04:00 committed by GitHub
parent b159bfe1c9
commit d1f840eb60
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -35,6 +35,12 @@ class TokenPosition {
required this.startIndex,
required this.endIndex,
});
Map<String, dynamic> toJson() => {
'token': token?.toJson(),
'startIndex': startIndex,
'endIndex': endIndex,
};
}
class TokensUtil {
@ -157,10 +163,14 @@ class TokensUtil {
final int startIndex = i;
if (isPunct || nextIsPunct) {
while (nextToken != null && currentToken?.end == nextToken.start) {
bool punctPickup = true;
while (nextToken != null &&
currentToken?.end == nextToken.start &&
punctPickup) {
i++;
currentToken = nextToken;
nextToken = i < tokens.length - 1 ? tokens[i + 1] : null;
punctPickup = nextToken?.pos == 'PUNCT';
}
}