1439 simplify accent and punctuation correction (#1511)

* feat: initial work to normalize error spans to reduce calls to span_details

* feat: if step is normalization error, don't add to choreo record so uses are saved as WA

* fix: add back null check
This commit is contained in:
ggurdin 2025-01-27 16:05:31 -05:00 committed by GitHub
parent 632384af03
commit cb98328adf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 89 additions and 13 deletions

View file

@ -383,12 +383,17 @@ class Choreographer {
igc.igcTextData!.matches[matchIndex].match.choices![choiceIndex]
.selected = true;
final isNormalizationError =
igc.spanDataController.isNormalizationError(matchIndex);
//if it's the right choice, replace in text
choreoRecord.addRecord(
_textController.text,
match: igc.igcTextData!.matches[matchIndex].copyWith
..status = PangeaMatchStatus.accepted,
);
if (!isNormalizationError) {
choreoRecord.addRecord(
_textController.text,
match: igc.igcTextData!.matches[matchIndex].copyWith
..status = PangeaMatchStatus.accepted,
);
}
igc.igcTextData!.acceptReplacement(
matchIndex,
@ -442,10 +447,16 @@ class Choreographer {
igc.onIgnoreMatch(igc.igcTextData!.matches[matchIndex]);
igc.igcTextData!.matches[matchIndex].status = PangeaMatchStatus.ignored;
choreoRecord.addRecord(
_textController.text,
match: igc.igcTextData!.matches[matchIndex],
);
final isNormalizationError =
igc.spanDataController.isNormalizationError(matchIndex);
if (!isNormalizationError) {
choreoRecord.addRecord(
_textController.text,
match: igc.igcTextData!.matches[matchIndex],
);
}
igc.igcTextData!.matches.removeAt(matchIndex);
} catch (err, stack) {

View file

@ -3,9 +3,12 @@ import 'dart:developer';
import 'package:flutter/foundation.dart';
import 'package:collection/collection.dart';
import 'package:fluffychat/pangea/choreographer/controllers/choreographer.dart';
import 'package:fluffychat/pangea/choreographer/models/span_data.dart';
import 'package:fluffychat/pangea/choreographer/repo/span_data_repo.dart';
import 'package:fluffychat/pangea/choreographer/utils/normalize_text.dart';
import 'package:fluffychat/pangea/common/utils/error_handler.dart';
class _SpanDetailsCacheItem {
@ -36,20 +39,44 @@ class SpanDataController {
_cacheClearTimer?.cancel();
}
Future<void> getSpanDetails(int matchIndex) async {
SpanData? _getSpan(int matchIndex) {
if (choreographer.igc.igcTextData == null ||
choreographer.igc.igcTextData!.matches.isEmpty ||
matchIndex < 0 ||
matchIndex >= choreographer.igc.igcTextData!.matches.length) {
debugger(when: kDebugMode);
return;
return null;
}
/// Retrieves the span data from the `igcTextData` matches at the specified `matchIndex`.
/// Creates a `SpanDetailsRepoReqAndRes` object with the retrieved span data and other parameters.
/// Generates a cache key based on the created `SpanDetailsRepoReqAndRes` object.
final SpanData span =
choreographer.igc.igcTextData!.matches[matchIndex].match;
return choreographer.igc.igcTextData!.matches[matchIndex].match;
}
bool isNormalizationError(int matchIndex) {
final span = _getSpan(matchIndex);
if (span == null) return false;
final correctChoice = span.choices
?.firstWhereOrNull(
(c) => c.isBestCorrection,
)
?.value;
final errorSpan = span.fullText.substring(
span.offset,
span.offset + span.length,
);
return correctChoice != null &&
normalizeString(correctChoice) == normalizeString(errorSpan);
}
Future<void> getSpanDetails(int matchIndex) async {
final SpanData? span = _getSpan(matchIndex);
if (span == null || isNormalizationError(matchIndex)) return;
final req = SpanDetailsRepoReqAndRes(
userL1: choreographer.l1LangCode!,
userL2: choreographer.l2LangCode!,

View file

@ -0,0 +1,29 @@
import 'package:diacritic/diacritic.dart';
import 'package:fluffychat/pangea/common/utils/error_handler.dart';
String normalizeString(String input) {
try {
// Step 1: Remove diacritics (accents)
String normalized = removeDiacritics(input);
normalized = normalized.replaceAll(RegExp(r'[^\x00-\x7F]'), '');
// Step 2: Remove punctuation
normalized = normalized.replaceAll(RegExp(r'[^\w\s]'), '');
// Step 3: Convert to lowercase
normalized = normalized.toLowerCase();
// Step 4: Trim and normalize whitespace
normalized = normalized.replaceAll(RegExp(r'\s+'), ' ').trim();
return normalized.isEmpty ? input : normalized;
} catch (e, s) {
ErrorHandler.logError(
e: e,
s: s,
data: {'input': input},
);
return input;
}
}

View file

@ -410,6 +410,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "7.0.1"
diacritic:
dependency: "direct main"
description:
name: diacritic
sha256: "12981945ec38931748836cd76f2b38773118d0baef3c68404bdfde9566147876"
url: "https://pub.dev"
source: hosted
version: "0.1.6"
dropdown_button2:
dependency: "direct main"
description:

View file

@ -29,6 +29,7 @@ dependencies:
# Pangea#
desktop_notifications: ^0.6.3
device_info_plus: ^10.0.1
diacritic: ^0.1.6
dynamic_color: ^1.7.0
emoji_picker_flutter: ^3.1.0
emojis: ^0.9.9