From d02acc988495f0e983bc7c72478018536e016cd1 Mon Sep 17 00:00:00 2001 From: William Jordan-Cooley Date: Wed, 3 Jul 2024 18:07:33 -0400 Subject: [PATCH] switching to token focus of saving constructs --- .../pangea_message_event.dart | 74 ++++++++++--------- lib/pangea/models/it_response_model.dart | 28 ++++--- lib/pangea/models/lemma.dart | 15 +--- lib/pangea/models/pangea_token_model.dart | 69 ++++++++--------- lib/pangea/repo/igc_repo.dart | 20 +++-- 5 files changed, 103 insertions(+), 103 deletions(-) diff --git a/lib/pangea/matrix_event_wrappers/pangea_message_event.dart b/lib/pangea/matrix_event_wrappers/pangea_message_event.dart index 334c0fa78..0dfaa149a 100644 --- a/lib/pangea/matrix_event_wrappers/pangea_message_event.dart +++ b/lib/pangea/matrix_event_wrappers/pangea_message_event.dart @@ -695,21 +695,25 @@ class PangeaMessageEvent { if (continuance.wasClicked) { //PTODO - account for end of flow score if (continuance.level != ChoreoConstants.levelThresholdForGreen) { - uses.addAll( - _lemmasToVocabUses( - continuance.lemmas, - ConstructUseTypeEnum.incIt, - ), - ); + for (final token in continuance.tokens) { + uses.add( + _lemmaToVocabUse( + token.lemma, + ConstructUseTypeEnum.incIt, + ), + ); + } } } else { if (continuance.level != ChoreoConstants.levelThresholdForGreen) { - uses.addAll( - _lemmasToVocabUses( - continuance.lemmas, - ConstructUseTypeEnum.ignIt, - ), - ); + for (final token in continuance.tokens) { + uses.add( + _lemmaToVocabUse( + token.lemma, + ConstructUseTypeEnum.ignIt, + ), + ); + } } } } @@ -729,13 +733,13 @@ class PangeaMessageEvent { // for each token, record whether selected in ga, ta, or wa for (final token in originalSent!.tokens!) { - uses.addAll(_getVocabUseForToken(token)); + uses.add(_getVocabUseForToken(token)); } return uses; } - /// Returns a list of [OneConstructUse] objects for the given [token] + /// Returns a [OneConstructUse] for the given [token] /// If there is no [originalSent] or [originalSent.choreo], the [token] is /// considered to be a [ConstructUseTypeEnum.wa] as long as it matches the target language. /// Later on, we may want to consider putting it in some category of like 'pending' @@ -744,11 +748,11 @@ class PangeaMessageEvent { /// If the [token] is in the [originalSent.choreo.acceptedOrIgnoredMatch.choices], /// it is considered to be a [ConstructUseTypeEnum.corIt]. /// If the [token] is not included in any choreoStep, it is considered to be a [ConstructUseTypeEnum.wa]. - List _getVocabUseForToken(PangeaToken token) { + OneConstructUse _getVocabUseForToken(PangeaToken token) { if (originalSent?.choreo == null) { final bool inUserL2 = originalSent?.langCode == l2Code; - return _lemmasToVocabUses( - token.lemmas, + return _lemmaToVocabUse( + token.lemma, inUserL2 ? ConstructUseTypeEnum.wa : ConstructUseTypeEnum.unk, ); } @@ -763,42 +767,40 @@ class PangeaMessageEvent { step.text.contains(r.value), ) ?? false)) { - return _lemmasToVocabUses(token.lemmas, ConstructUseTypeEnum.ga); + return _lemmaToVocabUse(token.lemma, ConstructUseTypeEnum.ga); } if (step.itStep != null) { final bool pickedThroughIT = step.itStep!.chosenContinuance?.text.contains(token.text.content) ?? false; if (pickedThroughIT) { - return _lemmasToVocabUses(token.lemmas, ConstructUseTypeEnum.corIt); + return _lemmaToVocabUse(token.lemma, ConstructUseTypeEnum.corIt); //PTODO - check if added via custom input in IT flow } } } - return _lemmasToVocabUses(token.lemmas, ConstructUseTypeEnum.wa); + return _lemmaToVocabUse(token.lemma, ConstructUseTypeEnum.wa); } /// Convert a list of [lemmas] into a list of vocab uses /// with the given [type] - List _lemmasToVocabUses( - List lemmas, + OneConstructUse _lemmaToVocabUse( + Lemma lemma, ConstructUseTypeEnum type, ) { final List uses = []; - for (final lemma in lemmas) { - if (lemma.saveVocab) { - uses.add( - OneConstructUse( - useType: type, - chatId: event.roomId!, - timeStamp: event.originServerTs, - lemma: lemma.text, - form: lemma.form, - msgId: event.eventId, - constructType: ConstructTypeEnum.vocab, - ), - ); - } + if (lemma.saveVocab) { + uses.add( + OneConstructUse( + useType: type, + chatId: event.roomId!, + timeStamp: event.originServerTs, + lemma: lemma.text, + form: lemma.form, + msgId: event.eventId, + constructType: ConstructTypeEnum.vocab, + ), + ); } return uses; } diff --git a/lib/pangea/models/it_response_model.dart b/lib/pangea/models/it_response_model.dart index 5fda36020..6adb4b4bc 100644 --- a/lib/pangea/models/it_response_model.dart +++ b/lib/pangea/models/it_response_model.dart @@ -2,11 +2,10 @@ import 'package:collection/collection.dart'; import 'package:fluffychat/pangea/constants/choreo_constants.dart'; import 'package:fluffychat/pangea/constants/model_keys.dart'; import 'package:fluffychat/pangea/extensions/my_list_extension.dart'; +import 'package:fluffychat/pangea/models/pangea_token_model.dart'; import 'package:flutter/material.dart'; import 'package:flutter_gen/gen_l10n/l10n.dart'; -import 'lemma.dart'; - class ITResponseModel { String fullTextTranslation; List continuances; @@ -79,7 +78,7 @@ class Continuance { double probability; int level; String text; - List lemmas; + List tokens; /// saving this in a full json form String description; @@ -99,19 +98,18 @@ class Continuance { required this.inDictionary, required this.hasInfo, required this.gold, - required this.lemmas, + required this.tokens, }); factory Continuance.fromJson(Map json) { - final List lemmaInternal = - (json[ModelKey.lemma] != null && json[ModelKey.lemma] is Iterable) - ? (json[ModelKey.lemma] as Iterable) - .map( - (e) => Lemma.fromJson(e as Map), - ) - .toList() - .cast() - : []; + final List tokensInternal = (json[ModelKey.tokens] != null) + ? (json[ModelKey.tokens] as Iterable) + .map( + (e) => PangeaToken.fromJson(e as Map), + ) + .toList() + .cast() + : []; return Continuance( probability: json['probability'].toDouble(), level: json['level'], @@ -122,7 +120,7 @@ class Continuance { wasClicked: json['clkd'] ?? false, hasInfo: json['has_info'] ?? false, gold: json['gold'] ?? false, - lemmas: lemmaInternal, + tokens: tokensInternal, ); } @@ -132,7 +130,7 @@ class Continuance { data['level'] = level; data['text'] = text; data['clkd'] = wasClicked; - data[ModelKey.lemma] = lemmas.map((e) => e.toJson()).toList(); + data[ModelKey.tokens] = tokens.map((e) => e.toJson()).toList(); if (!condensed) { data['description'] = description; diff --git a/lib/pangea/models/lemma.dart b/lib/pangea/models/lemma.dart index 017a7ab88..1dc44c4b5 100644 --- a/lib/pangea/models/lemma.dart +++ b/lib/pangea/models/lemma.dart @@ -8,22 +8,13 @@ class Lemma { /// [saveVocab] true - whether to save the lemma to the user's vocabulary /// vocab that are not saved: emails, urls, numbers, punctuation, etc. + /// server handles this determination final bool saveVocab; - /// [pos] ex "v" - part of speech of the lemma - /// https://universaldependencies.org/u/pos/ - final String pos; - - /// [morph] ex {} - morphological features of the lemma - /// https://universaldependencies.org/u/feat/ - final Map morph; - Lemma({ required this.text, required this.saveVocab, required this.form, - this.pos = '', - this.morph = const {}, }); factory Lemma.fromJson(Map json) { @@ -31,8 +22,6 @@ class Lemma { text: json['text'], saveVocab: json['save_vocab'] ?? json['saveVocab'] ?? false, form: json["form"] ?? json['text'], - pos: json['pos'] ?? '', - morph: json['morph'] ?? '{}', ); } @@ -41,8 +30,6 @@ class Lemma { 'text': text, 'save_vocab': saveVocab, 'form': form, - 'pos': pos, - 'morph': morph, }; } diff --git a/lib/pangea/models/pangea_token_model.dart b/lib/pangea/models/pangea_token_model.dart index 9dddd149b..7055c29fa 100644 --- a/lib/pangea/models/pangea_token_model.dart +++ b/lib/pangea/models/pangea_token_model.dart @@ -1,55 +1,58 @@ import 'dart:developer'; import 'package:flutter/foundation.dart'; -import 'package:sentry_flutter/sentry_flutter.dart'; import '../constants/model_keys.dart'; -import '../utils/error_handler.dart'; import 'lemma.dart'; class PangeaToken { PangeaTokenText text; - List lemmas; + Lemma lemma; + + /// [pos] ex "VERB" - part of speech of the token + /// https://universaldependencies.org/u/pos/ + final String pos; + + /// [morph] ex {} - morphological features of the token + /// https://universaldependencies.org/u/feat/ + final Map morph; PangeaToken({ required this.text, - required this.lemmas, + required this.lemma, + required this.pos, + required this.morph, }); - static getLemmas(String text, Iterable? json) { + static _getLemmas(String text, dynamic json) { if (json != null) { - return json - .map( - (e) => Lemma.fromJson(e as Map), - ) - .toList() - .cast(); + // July 24, 2024 - we're changing from a list to a single lemma and this is for backwards compatibility + // previously sent tokens have lists of lemmas + if (json is Iterable) { + return json + .map( + (e) => Lemma.fromJson(e as Map), + ) + .toList() + .cast(); + } else { + return Lemma.fromJson(json); + } } else { - return [Lemma(text: text, saveVocab: false, form: text)]; + // earlier still, we didn't have lemmas so this is for really old tokens + return Lemma(text: text, saveVocab: false, form: text); } } factory PangeaToken.fromJson(Map json) { - try { - final PangeaTokenText text = - PangeaTokenText.fromJson(json[_textKey] as Map); - return PangeaToken( - text: text, - lemmas: getLemmas(text.content, json[_lemmaKey]), - ); - } catch (err, s) { - debugger(when: kDebugMode); - Sentry.addBreadcrumb( - Breadcrumb( - message: "PangeaToken.fromJson error", - data: { - "json": json, - }, - ), - ); - ErrorHandler.logError(e: err, s: s); - rethrow; - } + final PangeaTokenText text = + PangeaTokenText.fromJson(json[_textKey] as Map); + return PangeaToken( + text: text, + lemma: _getLemmas(text.content, json[_lemmaKey]), + pos: json['pos'] ?? '', + morph: json['morph'] ?? '{}', + ); } static const String _textKey = "text"; @@ -57,7 +60,7 @@ class PangeaToken { Map toJson() => { _textKey: text.toJson(), - _lemmaKey: lemmas.map((e) => e.toJson()).toList(), + _lemmaKey: lemma.toJson(), }; int get end => text.offset + text.length; diff --git a/lib/pangea/repo/igc_repo.dart b/lib/pangea/repo/igc_repo.dart index d6271470a..e32f6cab7 100644 --- a/lib/pangea/repo/igc_repo.dart +++ b/lib/pangea/repo/igc_repo.dart @@ -47,23 +47,33 @@ class IgcRepo { tokens: [ PangeaToken( text: PangeaTokenText(content: "This", offset: 0, length: 4), - lemmas: [Lemma(form: "This", text: "this", saveVocab: true)], + lemma: Lemma(form: "This", text: "this", saveVocab: true), + pos: "DET", + morph: {}, ), PangeaToken( text: PangeaTokenText(content: "be", offset: 5, length: 2), - lemmas: [Lemma(form: "be", text: "be", saveVocab: true)], + lemma: Lemma(form: "be", text: "be", saveVocab: true), + pos: "VERB", + morph: {}, ), PangeaToken( text: PangeaTokenText(content: "a", offset: 8, length: 1), - lemmas: [], + lemma: Lemma(form: "a", text: "a", saveVocab: true), + pos: "DET", + morph: {}, ), PangeaToken( text: PangeaTokenText(content: "sample", offset: 10, length: 6), - lemmas: [], + lemma: Lemma(form: "sample", text: "sample", saveVocab: true), + pos: "NOUN", + morph: {}, ), PangeaToken( text: PangeaTokenText(content: "text", offset: 17, length: 4), - lemmas: [], + lemma: Lemma(form: "text", text: "text", saveVocab: true), + pos: "NOUN", + morph: {}, ), ], matches: [