fix: group adjacent punctuation tokens with content tokens to prevent line breaks, added token positions cache (#3713)
This commit is contained in:
parent
fd617f296f
commit
fe7e5385e8
3 changed files with 104 additions and 9 deletions
|
|
@ -14,6 +14,7 @@ import 'package:fluffychat/pages/chat/chat.dart';
|
|||
import 'package:fluffychat/pangea/events/event_wrappers/pangea_message_event.dart';
|
||||
import 'package:fluffychat/pangea/events/models/pangea_token_model.dart';
|
||||
import 'package:fluffychat/pangea/message_token_text/message_token_button.dart';
|
||||
import 'package:fluffychat/pangea/message_token_text/token_position_model.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/enums/reading_assistance_mode_enum.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/utils/token_rendering_util.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/widgets/message_selection_overlay.dart';
|
||||
|
|
@ -157,7 +158,7 @@ class HtmlMessage extends StatelessWidget {
|
|||
pangeaMessageEvent?.messageDisplayRepresentation?.tokens
|
||||
?.where(
|
||||
(t) =>
|
||||
!["PUNCT", "SYM"].contains(t.pos) &&
|
||||
!["SYM"].contains(t.pos) &&
|
||||
!t.lemma.text.contains(RegExp(r'[0-9]')) &&
|
||||
t.lemma.text.length <= 50,
|
||||
)
|
||||
|
|
@ -209,17 +210,25 @@ class HtmlMessage extends StatelessWidget {
|
|||
}
|
||||
|
||||
int position = 0;
|
||||
for (final PangeaToken token in tokens ?? []) {
|
||||
final String tokenText = token.text.content;
|
||||
final tokenPositions = tokens != null
|
||||
? TokensUtil.getAdjacentTokenPositions(event.eventId, tokens!)
|
||||
: [];
|
||||
|
||||
for (final TokenPosition tokenPosition in tokenPositions) {
|
||||
final String tokenSpanText = tokens!
|
||||
.sublist(tokenPosition.startIndex, tokenPosition.endIndex + 1)
|
||||
.map((t) => t.text.content)
|
||||
.join();
|
||||
|
||||
final substringIndex = result.indexWhere(
|
||||
(string) =>
|
||||
string.contains(tokenText) &&
|
||||
string.contains(tokenSpanText) &&
|
||||
!(string.startsWith('<') && string.endsWith('>')),
|
||||
position,
|
||||
);
|
||||
|
||||
if (substringIndex == -1) continue;
|
||||
int tokenIndex = result[substringIndex].indexOf(tokenText);
|
||||
int tokenIndex = result[substringIndex].indexOf(tokenSpanText);
|
||||
if (tokenIndex == -1) continue;
|
||||
|
||||
final beforeSubstring = result[substringIndex].substring(0, tokenIndex);
|
||||
|
|
@ -227,7 +236,7 @@ class HtmlMessage extends StatelessWidget {
|
|||
tokenIndex = beforeSubstring.characters.length;
|
||||
}
|
||||
|
||||
final int tokenLength = tokenText.characters.length;
|
||||
final int tokenLength = tokenSpanText.characters.length;
|
||||
final before =
|
||||
result[substringIndex].characters.take(tokenIndex).toString();
|
||||
final after = result[substringIndex]
|
||||
|
|
@ -237,7 +246,7 @@ class HtmlMessage extends StatelessWidget {
|
|||
|
||||
result.replaceRange(substringIndex, substringIndex + 1, [
|
||||
if (before.isNotEmpty) before,
|
||||
'<token offset="${token.text.offset}" length="${token.text.length}">$tokenText</token>',
|
||||
'<token offset="${tokenPosition.token!.text.offset}" length="${tokenPosition.token!.text.length}">$tokenSpanText</token>',
|
||||
if (after.isNotEmpty) after,
|
||||
]);
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,82 @@ class TokenPosition {
|
|||
}
|
||||
|
||||
class TokensUtil {
|
||||
static List<TokenPosition> getTokenPositions(
|
||||
/// A cache of calculated adjacent token positions
|
||||
static final Map<String, _TokenPositionCacheItem> _tokenPositionCache = {};
|
||||
|
||||
static const Duration _cacheDuration = Duration(minutes: 1);
|
||||
|
||||
static List<TokenPosition>? _getCachedTokenPositions(String eventID) {
|
||||
final cacheItem = _tokenPositionCache[eventID];
|
||||
if (cacheItem == null) return null;
|
||||
if (cacheItem.timestamp.isBefore(DateTime.now().subtract(_cacheDuration))) {
|
||||
_tokenPositionCache.remove(eventID);
|
||||
return null;
|
||||
}
|
||||
|
||||
return cacheItem.positions;
|
||||
}
|
||||
|
||||
static void _setCachedTokenPositions(
|
||||
String eventID,
|
||||
List<TokenPosition> positions,
|
||||
) {
|
||||
_tokenPositionCache[eventID] = _TokenPositionCacheItem(
|
||||
positions,
|
||||
DateTime.now(),
|
||||
);
|
||||
}
|
||||
|
||||
/// Given a list of tokens, returns a list of positions for tokens and adjacent punctuation
|
||||
/// This list may include gaps in the actual message for non-token elements,
|
||||
/// so should not be used to fully reconstruct the original message.
|
||||
static List<TokenPosition> getAdjacentTokenPositions(
|
||||
String eventID,
|
||||
List<PangeaToken> tokens,
|
||||
) {
|
||||
final cached = _getCachedTokenPositions(eventID);
|
||||
if (cached != null) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
final List<TokenPosition> positions = [];
|
||||
for (int i = 0; i < tokens.length; i++) {
|
||||
final PangeaToken token = tokens[i];
|
||||
|
||||
PangeaToken? currentToken = token;
|
||||
PangeaToken? nextToken = i < tokens.length - 1 ? tokens[i + 1] : null;
|
||||
|
||||
final isPunct = token.pos == 'PUNCT';
|
||||
final nextIsPunct = nextToken?.pos == 'PUNCT';
|
||||
|
||||
final int startIndex = i;
|
||||
if (isPunct || nextIsPunct) {
|
||||
while (nextToken != null && currentToken?.end == nextToken.start) {
|
||||
i++;
|
||||
currentToken = nextToken;
|
||||
nextToken = i < tokens.length - 1 ? tokens[i + 1] : null;
|
||||
}
|
||||
}
|
||||
|
||||
final adjacentTokens = tokens.sublist(startIndex, i + 1);
|
||||
if (adjacentTokens.every((t) => t.pos == 'PUNCT')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final position = TokenPosition(
|
||||
token: adjacentTokens.firstWhere((t) => t.pos != 'PUNCT'),
|
||||
startIndex: startIndex,
|
||||
endIndex: i,
|
||||
);
|
||||
positions.add(position);
|
||||
}
|
||||
|
||||
_setCachedTokenPositions(eventID, positions);
|
||||
return positions;
|
||||
}
|
||||
|
||||
/// Given a list of tokens, reconstructs an original message, including gaps for non-token elements.
|
||||
static List<TokenPosition> getGlobalTokenPositions(
|
||||
List<PangeaToken> tokens,
|
||||
) {
|
||||
final List<TokenPosition> tokenPositions = [];
|
||||
|
|
@ -83,3 +158,13 @@ class TokensUtil {
|
|||
return tokenPositions;
|
||||
}
|
||||
}
|
||||
|
||||
class _TokenPositionCacheItem {
|
||||
final List<TokenPosition> positions;
|
||||
final DateTime timestamp;
|
||||
|
||||
_TokenPositionCacheItem(
|
||||
this.positions,
|
||||
this.timestamp,
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,8 @@ class SttTranscriptTokens extends StatelessWidget {
|
|||
textScaler: TextScaler.noScaling,
|
||||
text: TextSpan(
|
||||
style: style ?? DefaultTextStyle.of(context).style,
|
||||
children: TokensUtil.getTokenPositions(tokens).map((tokenPosition) {
|
||||
children:
|
||||
TokensUtil.getGlobalTokenPositions(tokens).map((tokenPosition) {
|
||||
final text = messageCharacters
|
||||
.skip(tokenPosition.startIndex)
|
||||
.take(tokenPosition.endIndex - tokenPosition.startIndex)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue