From dbeb36e463eee0737ee51008fc7aaf56f896237f Mon Sep 17 00:00:00 2001 From: Kelrap <99418823+Kelrap@users.noreply.github.com> Date: Thu, 31 Jul 2025 10:08:31 -0400 Subject: [PATCH] Optimize choreo (#3567) * Basic setup of choreo changes * Make unit test for basic bsdiff functionality * Create class for efficiently storing choreo step edits * Edited unit test to use ChoreoEdit instead of bsdiff * Edit ChoreoRecord/Step to use ChoreoEdit * Test and debug ChoreoRecord * Accounting for more edge cases * Remove empty string redundancy in choreo edit * Save originalText to record instead of step * Make originalText non-nullable * Make test run properly with non-nullable originalText change * make originalText final * Tweak ChoreoEdit for slightly improved efficiency * chore: fix how edits are constructed for choreo records * fix issue with manual changes not being accounted for in fromJSON --------- Co-authored-by: ggurdin Co-authored-by: ggurdin <46800240+ggurdin@users.noreply.github.com> --- .../controllers/choreographer.dart | 81 ++++++--- .../choreographer/models/choreo_edit.dart | 90 ++++++++++ .../choreographer/models/choreo_record.dart | 167 +++++++++++++++--- .../choreographer/models/span_data.dart | 6 + .../widgets/igc/pangea_text_controller.dart | 6 +- .../event_wrappers/pangea_message_event.dart | 7 +- test/pangea/choreo_record_test.dart | 123 +++++++++++++ 7 files changed, 422 insertions(+), 58 deletions(-) create mode 100644 lib/pangea/choreographer/models/choreo_edit.dart create mode 100644 test/pangea/choreo_record_test.dart diff --git a/lib/pangea/choreographer/controllers/choreographer.dart b/lib/pangea/choreographer/controllers/choreographer.dart index e456bdddf..f4ebb045e 100644 --- a/lib/pangea/choreographer/controllers/choreographer.dart +++ b/lib/pangea/choreographer/controllers/choreographer.dart @@ -48,7 +48,7 @@ class Choreographer { final int msBeforeIGCStart = 10000; Timer? debounceTimer; - ChoreoRecord choreoRecord = ChoreoRecord.newRecord; + ChoreoRecord? choreoRecord; // last checked by IGC or translation String? _lastChecked; ChoreoMode choreoMode = ChoreoMode.igc; @@ -146,7 +146,7 @@ class Choreographer { final message = chatController.sendController.text; final fakeEventId = chatController.sendFakeMessage(); final PangeaRepresentation? originalWritten = - choreoRecord.includedIT && translatedText != null + choreoRecord?.includedIT == true && translatedText != null ? PangeaRepresentation( langCode: l1LangCode ?? LanguageKeys.unknownLanguage, text: translatedText!, @@ -195,7 +195,7 @@ class Choreographer { "currentText": message, "l1LangCode": l1LangCode, "l2LangCode": l2LangCode, - "choreoRecord": choreoRecord.toJson(), + "choreoRecord": choreoRecord?.toJson(), }, ); } finally { @@ -218,6 +218,14 @@ class Choreographer { } } + void _initChoreoRecord() { + choreoRecord ??= ChoreoRecord( + originalText: textController.text, + choreoSteps: [], + openMatches: [], + ); + } + void onITStart(PangeaMatch itMatch) { if (!itMatch.isITStart) { throw Exception("this isn't an itStart match!"); @@ -227,8 +235,6 @@ class Choreographer { ITStartData(_textController.text, null), ); itMatch.status = PangeaMatchStatus.accepted; - - choreoRecord.addRecord(_textController.text, match: itMatch); translatedText = _textController.text; //PTODO - if totally in L1, save tokens, that's good stuff @@ -236,6 +242,9 @@ class Choreographer { igc.clear(); _textController.setSystemText("", EditType.itStart); + + _initChoreoRecord(); + choreoRecord!.addRecord(_textController.text, match: itMatch); } /// Handles any changes to the text input @@ -311,6 +320,7 @@ class Choreographer { } startLoading(); + _initChoreoRecord(); // if getting language assistance after finishing IT, // reset the itController @@ -342,7 +352,6 @@ class Choreographer { } void onITChoiceSelect(ITStep step) { - choreoRecord.addRecord(_textController.text, step: step); _textController.setSystemText( _textController.text + step.continuances[step.chosen!].text, step.continuances[step.chosen!].gold @@ -351,6 +360,10 @@ class Choreographer { ); _textController.selection = TextSelection.collapsed(offset: _textController.text.length); + + _initChoreoRecord(); + choreoRecord!.addRecord(_textController.text, step: step); + giveInputFocus(); } @@ -400,14 +413,8 @@ class Choreographer { final isNormalizationError = igc.spanDataController.isNormalizationError(matchIndex); - //if it's the right choice, replace in text - if (!isNormalizationError) { - choreoRecord.addRecord( - _textController.text, - match: igc.igcTextData!.matches[matchIndex].copyWith - ..status = PangeaMatchStatus.accepted, - ); - } + final match = igc.igcTextData!.matches[matchIndex].copyWith + ..status = PangeaMatchStatus.accepted; igc.igcTextData!.acceptReplacement( matchIndex, @@ -419,6 +426,15 @@ class Choreographer { EditType.igc, ); + //if it's the right choice, replace in text + if (!isNormalizationError) { + _initChoreoRecord(); + choreoRecord!.addRecord( + _textController.text, + match: match, + ); + } + MatrixState.pAnyState.closeOverlay(); setState(); } catch (err, stack) { @@ -442,7 +458,7 @@ class Choreographer { try { igc.igcTextData?.undoReplacement(match); - choreoRecord.choreoSteps.removeWhere( + choreoRecord?.choreoSteps.removeWhere( (step) => step.acceptedOrIgnoredMatch == match, ); @@ -466,15 +482,26 @@ class Choreographer { } void acceptNormalizationMatches() { + final List indices = []; for (int i = 0; i < igc.igcTextData!.matches.length; i++) { final isNormalizationError = igc.spanDataController.isNormalizationError(i); + if (isNormalizationError) indices.add(i); + } - if (!isNormalizationError) continue; - final match = igc.igcTextData!.matches[i]; + if (indices.isEmpty) return; + _initChoreoRecord(); + final matches = igc.igcTextData!.matches + .where( + (match) => indices.contains(igc.igcTextData!.matches.indexOf(match)), + ) + .toList(); + + for (final match in matches) { + final index = igc.igcTextData!.matches.indexOf(match); igc.igcTextData!.acceptReplacement( - i, + index, match.match.choices!.indexWhere( (c) => c.isBestCorrection, ), @@ -488,15 +515,15 @@ class Choreographer { .characters .length; - choreoRecord.addRecord( - _textController.text, - match: newMatch, - ); - _textController.setSystemText( igc.igcTextData!.originalInput, EditType.igc, ); + + choreoRecord!.addRecord( + currentText, + match: newMatch, + ); } } @@ -528,7 +555,8 @@ class Choreographer { igc.spanDataController.isNormalizationError(matchIndex); if (!isNormalizationError) { - choreoRecord.addRecord( + _initChoreoRecord(); + choreoRecord!.addRecord( _textController.text, match: igc.igcTextData!.matches[matchIndex], ); @@ -575,7 +603,7 @@ class Choreographer { _lastChecked = null; _timesClicked = 0; isFetching = false; - choreoRecord = ChoreoRecord.newRecord; + choreoRecord = null; translatedText = null; itController.clear(); igc.dispose(); @@ -583,7 +611,8 @@ class Choreographer { } Future onPaste(value) async { - choreoRecord.pastedStrings.add(value); + _initChoreoRecord(); + choreoRecord!.pastedStrings.add(value); } dispose() { diff --git a/lib/pangea/choreographer/models/choreo_edit.dart b/lib/pangea/choreographer/models/choreo_edit.dart new file mode 100644 index 000000000..04951e98d --- /dev/null +++ b/lib/pangea/choreographer/models/choreo_edit.dart @@ -0,0 +1,90 @@ +import 'dart:math'; + +/// Changes made to previous choreo step's text +/// Remove substring of length 'length', starting at position 'offset' +/// Then add String 'insert' at that position +class ChoreoEdit { + int offset = 0; + int length = 0; + String insert = ""; + + /// Normal constructor created from preexisting ChoreoEdit values + ChoreoEdit({ + required this.offset, + required this.length, + required this.insert, + }); + + /// Constructor that determines and saves + /// edits differentiating originalText and editedText + ChoreoEdit.fromText({ + required String originalText, + required String editedText, + }) { + if (originalText == editedText) { + // No changes, return empty edit + return; + } + + offset = _firstDifference(originalText, editedText); + length = _lastDifference(originalText, editedText) + 1 - offset; + insert = _insertion(originalText, editedText); + } + + factory ChoreoEdit.fromJson(Map json) { + return ChoreoEdit( + offset: json[_offsetKey], + length: json[_lengthKey], + insert: json[_insertKey], + ); + } + + static const _offsetKey = "offst_v2"; + static const _lengthKey = "lngth_v2"; + static const _insertKey = "insrt_v2"; + + Map toJson() { + final data = {}; + data[_offsetKey] = offset; + data[_lengthKey] = length; + data[_insertKey] = insert; + return data; + } + + /// Find index of first character where strings differ + int _firstDifference(String originalText, String editedText) { + var i = 0; + final minLength = min(originalText.length, editedText.length); + while (i < minLength && originalText[i] == editedText[i]) { + i++; + } + return i; + } + + /// Starting at the end of both text versions, + /// traverse backward until a non-matching char is found + int _lastDifference(String originalText, String editedText) { + var i = originalText.length - 1; + var j = editedText.length - 1; + while (min(i, j) >= offset && originalText[i] == editedText[j]) { + i--; + j--; + } + return i; + } + + /// Length of inserted text is the length of deleted text, + /// plus the difference in string length + /// If dif is -x and length of deleted text is x, + /// inserted text is empty string + String _insertion(String originalText, String editedText) { + final insertLength = length + (editedText.length - originalText.length); + return editedText.substring(offset, offset + insertLength); + } + + /// Given the original string, use offset, length, and insert + /// to find the edited version of the string + String editedText(String originalText) { + return originalText.replaceRange(offset, offset + length, insert); + } +} diff --git a/lib/pangea/choreographer/models/choreo_record.dart b/lib/pangea/choreographer/models/choreo_record.dart index 8adb25316..da34f02b7 100644 --- a/lib/pangea/choreographer/models/choreo_record.dart +++ b/lib/pangea/choreographer/models/choreo_record.dart @@ -1,6 +1,8 @@ import 'dart:convert'; +import 'package:fluffychat/pangea/choreographer/models/choreo_edit.dart'; import 'package:fluffychat/pangea/choreographer/models/pangea_match_model.dart'; +import 'package:fluffychat/pangea/choreographer/models/span_data.dart'; import 'it_step.dart'; /// this class lives within a [PangeaIGCEvent] @@ -20,33 +22,120 @@ class ChoreoRecord { final Set pastedStrings = {}; + final String originalText; + ChoreoRecord({ required this.choreoSteps, required this.openMatches, - // required this.current, + required this.originalText, }); - factory ChoreoRecord.fromJson(Map json) { + factory ChoreoRecord.fromJson( + Map json, [ + String? defaultOriginalText, + ]) { final stepsRaw = json[_stepsKey]; - return ChoreoRecord( - choreoSteps: (jsonDecode(stepsRaw ?? "[]") as Iterable) - .map((e) { - return ChoreoRecordStep.fromJson(e); - }) + String? originalText = json[_originalTextKey]; + + List steps = []; + final stepContent = (jsonDecode(stepsRaw ?? "[]") as Iterable); + + if (stepContent.isNotEmpty && + originalText == null && + stepContent.first["txt"] is String) { + originalText = stepContent.first["txt"]; + } + + if (stepContent.every((step) => step["txt"] is! String)) { + steps = stepContent + .map((e) => ChoreoRecordStep.fromJson(e)) .toList() - .cast(), + .cast(); + } else { + String? currentEdit = originalText; + for (final content in stepContent) { + final String textBefore = content["txt"] ?? ""; + String textAfter = textBefore; + + currentEdit ??= textBefore; + + // typically, taking the original text and applying the edits from the choreo steps + // will yield a correct result, but it's possible the user manually changed the text + // between steps, so we need handle that by adding an extra step + if (textBefore != currentEdit) { + final edits = ChoreoEdit.fromText( + originalText: currentEdit, + editedText: textBefore, + ); + + steps.add(ChoreoRecordStep(edits: edits)); + currentEdit = textBefore; + } + + int offset = 0; + int length = 0; + String insert = ""; + + final step = ChoreoRecordStep.fromJson(content); + if (step.acceptedOrIgnoredMatch != null) { + final SpanData? match = step.acceptedOrIgnoredMatch?.match; + final correction = match?.bestChoice; + if (correction != null) { + offset = match!.offset; + length = match.length; + insert = correction.value; + } + } else if (step.itStep != null) { + final chosen = step.itStep!.chosenContinuance; + if (chosen != null) { + offset = (content["txt"] ?? "").length; + insert = chosen.text; + } + } + + if (textBefore.length - offset - length < 0) { + length = textBefore.length - offset; + } + + textAfter = textBefore.replaceRange( + offset, + offset + length, + insert, + ); + + final edits = ChoreoEdit.fromText( + originalText: currentEdit, + editedText: textAfter, + ); + + currentEdit = textAfter; + step.edits = edits; + steps.add(step); + } + } + + if (originalText == null && + (steps.isNotEmpty || defaultOriginalText == null)) { + throw Exception( + "originalText cannot be null, please provide a valid original text", + ); + } + + return ChoreoRecord( + choreoSteps: steps, + originalText: originalText ?? defaultOriginalText!, openMatches: (jsonDecode(json[_openMatchesKey] ?? "[]") as Iterable) .map((e) { return PangeaMatch.fromJson(e); }) .toList() .cast(), - // current: json[_currentKey], ); } static const _stepsKey = "stps"; static const _openMatchesKey = "mtchs"; + static const _originalTextKey = "ogtxt_v2"; // static const _currentKey = "crnt"; Map toJson() { @@ -54,23 +143,55 @@ class ChoreoRecord { data[_stepsKey] = jsonEncode(choreoSteps.map((e) => e.toJson()).toList()); data[_openMatchesKey] = jsonEncode(openMatches.map((e) => e.toJson()).toList()); + data[_originalTextKey] = originalText; // data[_currentKey] = current; return data; } - addRecord(String text, {PangeaMatch? match, ITStep? step}) { + void addRecord(String text, {PangeaMatch? match, ITStep? step}) { if (match != null && step != null) { throw Exception("match and step should not both be defined"); } + + final edit = ChoreoEdit.fromText( + originalText: stepText(), + editedText: text, + ); + choreoSteps.add( ChoreoRecordStep( - text: text, + edits: edit, acceptedOrIgnoredMatch: match, itStep: step, ), ); } + /// Get the text at [stepIndex] + String stepText({int? stepIndex}) { + stepIndex ??= choreoSteps.length - 1; + if (stepIndex >= choreoSteps.length) { + throw RangeError.index(stepIndex, choreoSteps, "index out of range"); + } + + if (stepIndex < 0) return originalText; + + String text = originalText; + for (int i = 0; i <= stepIndex; i++) { + final step = choreoSteps[i]; + if (step.edits == null) continue; + final edits = step.edits!; + + text = text.replaceRange( + edits.offset, + edits.offset + edits.length, + edits.insert, + ); + } + + return text; + } + bool get hasAcceptedMatches => choreoSteps.any( (element) => element.acceptedOrIgnoredMatch?.status == @@ -100,16 +221,8 @@ class ChoreoRecord { (step.acceptedOrIgnoredMatch?.isGrammarMatch ?? false); }); - static ChoreoRecord get newRecord => ChoreoRecord( - choreoSteps: [], - openMatches: [], - ); - List get itSteps => choreoSteps.where((e) => e.itStep != null).map((e) => e.itStep!).toList(); - - String get finalMessage => - choreoSteps.isNotEmpty ? choreoSteps.last.text : ""; } /// A new ChoreoRecordStep is saved in the following cases: @@ -134,8 +247,11 @@ class ChoreoRecord { /// the user chooses "hola" and a step is saved /// adds "amigo" and a step saved class ChoreoRecordStep { - /// text after changes have been made - String text; + /// Edits that, when applied to the previous step's text, + /// will provide the current step's text + /// Should always exist, except when using fromJSON + /// on old version of ChoreoRecordStep + ChoreoEdit? edits; /// all matches throughout edit process, /// including those open, accepted and ignored @@ -145,7 +261,7 @@ class ChoreoRecordStep { ITStep? itStep; ChoreoRecordStep({ - required this.text, + this.edits, this.acceptedOrIgnoredMatch, this.itStep, }) { @@ -158,7 +274,8 @@ class ChoreoRecordStep { factory ChoreoRecordStep.fromJson(Map json) { return ChoreoRecordStep( - text: json[_textKey], + edits: + json[_editKey] != null ? ChoreoEdit.fromJson(json[_editKey]) : null, acceptedOrIgnoredMatch: json[_acceptedOrIgnoredMatchKey] != null ? PangeaMatch.fromJson(json[_acceptedOrIgnoredMatchKey]) : null, @@ -166,13 +283,13 @@ class ChoreoRecordStep { ); } - static const _textKey = "txt"; + static const _editKey = "edits_v2"; static const _acceptedOrIgnoredMatchKey = "mtch"; static const _stepKey = "stp"; Map toJson() { final data = {}; - data[_textKey] = text; + data[_editKey] = edits?.toJson(); data[_acceptedOrIgnoredMatchKey] = acceptedOrIgnoredMatch?.toJson(); data[_stepKey] = itStep?.toJson(); return data; diff --git a/lib/pangea/choreographer/models/span_data.dart b/lib/pangea/choreographer/models/span_data.dart index c3bfc3169..fb26ff28d 100644 --- a/lib/pangea/choreographer/models/span_data.dart +++ b/lib/pangea/choreographer/models/span_data.dart @@ -65,6 +65,12 @@ class SpanData { 'type': type.toJson(), 'rule': rule?.toJson(), }; + + SpanChoice? get bestChoice { + return choices?.firstWhereOrNull( + (choice) => choice.isBestCorrection, + ); + } } class SpanChoice { diff --git a/lib/pangea/choreographer/widgets/igc/pangea_text_controller.dart b/lib/pangea/choreographer/widgets/igc/pangea_text_controller.dart index 93ec46876..edcaab52c 100644 --- a/lib/pangea/choreographer/widgets/igc/pangea_text_controller.dart +++ b/lib/pangea/choreographer/widgets/igc/pangea_text_controller.dart @@ -173,13 +173,13 @@ class PangeaTextController extends TextEditingController { return TextSpan(text: text, style: style); } - final choreoSteps = choreographer.choreoRecord.choreoSteps; + final choreoSteps = choreographer.choreoRecord?.choreoSteps; List inlineSpans = []; try { inlineSpans = choreographer.igc.igcTextData!.constructTokenSpan( - choreoSteps: choreoSteps.isNotEmpty && - choreoSteps.last.acceptedOrIgnoredMatch?.status == + choreoSteps: (choreoSteps?.isNotEmpty ?? false) && + choreoSteps!.last.acceptedOrIgnoredMatch?.status == PangeaMatchStatus.automatic ? choreoSteps : [], diff --git a/lib/pangea/events/event_wrappers/pangea_message_event.dart b/lib/pangea/events/event_wrappers/pangea_message_event.dart index 74aacf677..6e3eb16e9 100644 --- a/lib/pangea/events/event_wrappers/pangea_message_event.dart +++ b/lib/pangea/events/event_wrappers/pangea_message_event.dart @@ -381,6 +381,7 @@ class PangeaMessageEvent { if (_latestEdit.content[ModelKey.choreoRecord] == null) return null; return ChoreoRecord.fromJson( _latestEdit.content[ModelKey.choreoRecord] as Map, + originalWrittenContent, ); } catch (e, s) { ErrorHandler.logError( @@ -599,10 +600,8 @@ class PangeaMessageEvent { String? written = originalSent?.content.text; if (originalWritten != null && !originalWritten!.content.originalSent) { written = originalWritten!.text; - } else if (originalSent?.choreo != null && - originalSent!.choreo!.choreoSteps.isNotEmpty) { - final steps = originalSent!.choreo!.choreoSteps; - written = steps.first.text; + } else if (originalSent?.choreo != null) { + written = originalSent!.choreo!.originalText; } return written ?? body; diff --git a/test/pangea/choreo_record_test.dart b/test/pangea/choreo_record_test.dart new file mode 100644 index 000000000..0da9d0702 --- /dev/null +++ b/test/pangea/choreo_record_test.dart @@ -0,0 +1,123 @@ +import 'package:flutter_test/flutter_test.dart'; + +import 'package:fluffychat/pangea/choreographer/models/choreo_edit.dart'; +import 'package:fluffychat/pangea/choreographer/models/choreo_record.dart'; + +void main() async { + group("Optimized choreo record tests", () { + test("Test that choreo_edit parameters are accurately calculated", () { + const String originalText = "Parameter"; + const String editedText = "Perrimeter"; + + final ChoreoEdit edits = ChoreoEdit.fromText( + originalText: originalText, + editedText: editedText, + ); + + assert( + edits.offset == 1 && edits.length == 3 && edits.insert == "erri", + ); + }); + + test("Test that data saved via ChoreoEdit can be accurately retrieved", () { + const String originalText = "step"; + const String editedText = "steps"; + + final ChoreoEdit edits = ChoreoEdit.fromText( + originalText: originalText, + editedText: editedText, + ); + + final String retrieved = edits.editedText(originalText); + + assert( + retrieved == editedText, + ); + }); + + test("Test that addRecord and lastText work correctly", () { + final List steps = []; + + steps.add(""); + steps.add("Si"); + + final record = ChoreoRecord( + originalText: "Yes", + choreoSteps: [], + openMatches: [], + ); + + for (final step in steps) { + record.addRecord(step); + } + + assert( + record.choreoSteps[0].edits != null && + record.choreoSteps[1].edits != null && + record.stepText() == "Si", + ); + }); + + test("Test that fromJSON receives updated version correctly", () { + final List steps = []; + + steps.add(""); + steps.add("Si"); + + final record = ChoreoRecord( + originalText: "Yes", + choreoSteps: [], + openMatches: [], + ); + + for (final step in steps) { + record.addRecord(step); + } + + final json = record.toJson(); + final received = ChoreoRecord.fromJson(json); + + assert( + received.choreoSteps[0].edits != null && + received.choreoSteps[1].edits != null && + received.stepText() == "Si", + ); + }); + + test("Test that fromJSON converts old version correctly", () { + final List steps = []; + + steps.add(""); + steps.add("Dos"); + steps.add("Tres"); + steps.add(""); + steps.add("Cinco"); + steps.add("Cincai"); + + final record = ChoreoRecord( + originalText: "One", + choreoSteps: [], + openMatches: [], + ); + + for (final step in steps) { + record.addRecord(step); + } + + final json = record.toJson(); + final received = ChoreoRecord.fromJson(json); + + // Initial step and steps following empty strings + // will have text instead of edits + assert( + received.choreoSteps[0].edits != null && + received.choreoSteps[1].edits != null && + received.choreoSteps[2].edits != null && + received.choreoSteps[3].edits != null && + received.choreoSteps[4].edits != null && + received.choreoSteps[5].edits != null && + received.stepText() == "Cincai", + ); + }); + }); +}