using getIGCTextData to fetch tokens and languages if not present

This commit is contained in:
William Jordan-Cooley 2024-06-30 12:08:30 -04:00
parent 919cfc4bd3
commit 2526331706
7 changed files with 103 additions and 139 deletions

View file

@ -117,34 +117,39 @@ class Choreographer {
// TODO - move this to somewhere such that the message can be cleared from the input field
// before the language detection is complete. Otherwise, user is going to be waiting
// in cases of slow internet or slow language detection
final String originalSentLangCode = langCodeOfCurrentText ??
(await pangeaController.languageDetection.detectLanguage(
currentText,
pangeaController.languageController.userL2?.langCode,
pangeaController.languageController.userL1?.langCode,
))
.bestDetection()
.langCode;
final PangeaRepresentation originalSent = PangeaRepresentation(
langCode: originalSentLangCode,
text: currentText,
originalSent: true,
originalWritten: originalWritten == null,
);
final String? originalSentLangCode = igc.igcTextData?.detectedLanguage;
// TODO - why does both it and igc need to be enabled for choreo to be applicable?
final ChoreoRecord? applicableChoreo =
isITandIGCEnabled && igc.igcTextData != null ? choreoRecord : null;
final UseType useType = useTypeCalculator(applicableChoreo);
debugPrint("use type in choreographer $useType");
// if tokens or language detection are not available, get them
// note that we probably need to move this to after we clear the input field
// or the user could experience some lag here. note that this call is being
// made after we've determined if we have an applicable choreo in order to
// say whether correction was run on the message. we may eventually want
// to edit the useType after
if (igc.igcTextData?.tokens == null ||
igc.igcTextData?.detectedLanguage == null) {
await igc.getIGCTextData(onlyTokensAndLanguageDetection: true);
}
final PangeaRepresentation originalSent = PangeaRepresentation(
langCode: originalSentLangCode ?? LanguageKeys.unknownLanguage,
text: currentText,
originalSent: true,
originalWritten: originalWritten == null,
);
debugger(when: kDebugMode);
chatController.send(
// PTODO - turn this back on in conjunction with saving tokens
// we need to save those tokens as well, in order for exchanges to work
// properly. in an exchange, the other user will want
// originalWritten: originalWritten,
originalSent: originalSent,
tokensSent: igc.igcTextData?.tokens != null
? PangeaMessageTokens(tokens: igc.igcTextData!.tokens)
@ -170,7 +175,7 @@ class Choreographer {
}
choreoMode = ChoreoMode.it;
itController.initializeIT(
ITStartData(_textController.text, igc.detectedLangCode),
ITStartData(_textController.text, igc.igcTextData?.detectedLanguage),
);
itMatch.status = PangeaMatchStatus.accepted;
@ -195,7 +200,7 @@ class Choreographer {
// this may be unnecessary now that tokens are not used
// to allow click of words in the input field and we're getting this at the end
// TODO - turn it off and tested that this is fine
igc.justGetTokensAndAddThemToIGCTextData();
// igc.justGetTokensAndAddThemToIGCTextData();
// we set editType to keyboard here because that is the default for it
// and we want to make sure that the next change is treated as a keyboard change
@ -499,22 +504,6 @@ class Choreographer {
bool get editTypeIsKeyboard => EditType.keyboard == _textController.editType;
/// If there is applicable igcTextData, return the detected langCode
/// Otherwise, if the IT controller is open, return the user's L2 langCode
/// This second piece assumes that IT is being used to translate into the user's L2
/// and could be spotty. It's a bit of a hack, and should be tested more.
String? get langCodeOfCurrentText {
if (igc.detectedLangCode != null) return igc.detectedLangCode!;
// TODO - this is a bit of a hack, and should be tested more
// we should also check that user has not done customInput
if (itController.completedITSteps.isNotEmpty && itController.allCorrect) {
return l2LangCode!;
}
return null;
}
setState() {
if (!stateListener.isClosed) {
stateListener.add(0);

View file

@ -10,11 +10,8 @@ import 'package:fluffychat/pangea/repo/igc_repo.dart';
import 'package:fluffychat/pangea/widgets/igc/span_card.dart';
import 'package:flutter/foundation.dart';
import 'package:flutter/material.dart';
import 'package:sentry_flutter/sentry_flutter.dart';
import '../../models/language_detection_model.dart';
import '../../models/span_card_model.dart';
import '../../repo/tokens_repo.dart';
import '../../utils/error_handler.dart';
import '../../utils/overlay.dart';
@ -64,22 +61,6 @@ class IgcController {
return;
}
//TO-DO: in api call, specify turning off IT and/or grammar checking
// UPDATE: This is now done in the API call. New TODO is to test this.
// if (!choreographer.igcEnabled) {
// igcTextDataResponse.matches = igcTextDataResponse.matches
// .where((match) => !match.isGrammarMatch)
// .toList();
// }
// if (!choreographer.itEnabled) {
// igcTextDataResponse.matches = igcTextDataResponse.matches
// .where((match) => !match.isOutOfTargetMatch)
// .toList();
// }
// if (!choreographer.itEnabled && !choreographer.igcEnabled) {
// igcTextDataResponse.matches = [];
// }
igcTextData = igcTextDataResponse;
// TODO - for each new match,
@ -106,61 +87,61 @@ class IgcController {
}
}
Future<void> justGetTokensAndAddThemToIGCTextData() async {
try {
if (igcTextData == null) {
debugger(when: kDebugMode);
choreographer.getLanguageHelp();
return;
}
igcTextData!.loading = true;
choreographer.startLoading();
if (igcTextData!.originalInput != choreographer.textController.text) {
debugger(when: kDebugMode);
ErrorHandler.logError(
m: "igcTextData fullText does not match current text",
s: StackTrace.current,
data: igcTextData!.toJson(),
);
}
// Future<void> justGetTokensAndAddThemToIGCTextData() async {
// try {
// if (igcTextData == null) {
// debugger(when: kDebugMode);
// choreographer.getLanguageHelp();
// return;
// }
// igcTextData!.loading = true;
// choreographer.startLoading();
// if (igcTextData!.originalInput != choreographer.textController.text) {
// debugger(when: kDebugMode);
// ErrorHandler.logError(
// m: "igcTextData fullText does not match current text",
// s: StackTrace.current,
// data: igcTextData!.toJson(),
// );
// }
if (choreographer.l1LangCode == null ||
choreographer.l2LangCode == null) {
debugger(when: kDebugMode);
ErrorHandler.logError(
m: "l1LangCode and/or l2LangCode is null",
s: StackTrace.current,
data: {
"l1LangCode": choreographer.l1LangCode,
"l2LangCode": choreographer.l2LangCode,
},
);
return;
}
// if (choreographer.l1LangCode == null ||
// choreographer.l2LangCode == null) {
// debugger(when: kDebugMode);
// ErrorHandler.logError(
// m: "l1LangCode and/or l2LangCode is null",
// s: StackTrace.current,
// data: {
// "l1LangCode": choreographer.l1LangCode,
// "l2LangCode": choreographer.l2LangCode,
// },
// );
// return;
// }
final TokensResponseModel res = await TokensRepo.tokenize(
await choreographer.pangeaController.userController.accessToken,
TokensRequestModel(
fullText: igcTextData!.originalInput,
userL1: choreographer.l1LangCode!,
userL2: choreographer.l2LangCode!,
),
);
igcTextData?.tokens = res.tokens;
} catch (err, stack) {
debugger(when: kDebugMode);
choreographer.errorService.setError(
ChoreoError(type: ChoreoErrorType.unknown, raw: err),
);
Sentry.addBreadcrumb(
Breadcrumb.fromJson({"igctextDdata": igcTextData?.toJson()}),
);
ErrorHandler.logError(e: err, s: stack);
} finally {
igcTextData?.loading = false;
choreographer.stopLoading();
}
}
// final TokensResponseModel res = await TokensRepo.tokenize(
// await choreographer.pangeaController.userController.accessToken,
// TokensRequestModel(
// fullText: igcTextData!.originalInput,
// userL1: choreographer.l1LangCode!,
// userL2: choreographer.l2LangCode!,
// ),
// );
// igcTextData?.tokens = res.tokens;
// } catch (err, stack) {
// debugger(when: kDebugMode);
// choreographer.errorService.setError(
// ChoreoError(type: ChoreoErrorType.unknown, raw: err),
// );
// Sentry.addBreadcrumb(
// Breadcrumb.fromJson({"igctextDdata": igcTextData?.toJson()}),
// );
// ErrorHandler.logError(e: err, s: stack);
// } finally {
// igcTextData?.loading = false;
// choreographer.stopLoading();
// }
// }
void showFirstMatch(BuildContext context) {
if (igcTextData == null || igcTextData!.matches.isEmpty) {
@ -218,14 +199,6 @@ class IgcController {
return true;
}
String? get detectedLangCode {
if (!hasRelevantIGCTextData) return null;
final LanguageDetection first = igcTextData!.detections.first;
return first.langCode;
}
clear() {
igcTextData = null;
spanDataController.clearCache();

View file

@ -72,8 +72,6 @@ class ITController {
/// if IGC isn't positive that text is full L1 then translate to L1
Future<void> _setSourceText() async {
debugger(when: kDebugMode);
// try {
if (_itStartData == null || _itStartData!.text.isEmpty) {
Sentry.addBreadcrumb(
Breadcrumb(
@ -98,21 +96,12 @@ class ITController {
request: FullTextTranslationRequestModel(
text: _itStartData!.text,
tgtLang: choreographer.l1LangCode!,
srcLang: choreographer.l2LangCode,
srcLang: _itStartData!.langCode,
userL1: choreographer.l1LangCode!,
userL2: choreographer.l2LangCode!,
),
);
sourceText = res.bestTranslation;
// } catch (err, stack) {
// debugger(when: kDebugMode);
// if (_itStartData?.text.isNotEmpty ?? false) {
// ErrorHandler.logError(e: err, s: stack);
// sourceText = _itStartData!.text;
// } else {
// rethrow;
// }
// }
}
// used 1) at very beginning (with custom input = null)

View file

@ -309,7 +309,10 @@ class MyAnalyticsController {
recentConstructUses.addAll(constructLists.expand((e) => e));
//TODO - confirm that this is the correct construct content
debugger(when: kDebugMode && recentConstructUses.isNotEmpty);
debugger(
when: kDebugMode &&
(recentPangeaMessageEvents.isNotEmpty ||
recentActivityRecords.isNotEmpty));
await analyticsRoom.sendConstructsEvent(
recentConstructUses,

View file

@ -666,7 +666,6 @@ class PangeaMessageEvent {
/// get construct uses of type vocab for the message
List<OneConstructUse> get _vocabUses {
debugger();
final List<OneConstructUse> uses = [];
// missing vital info so return. should not happen
@ -739,7 +738,6 @@ class PangeaMessageEvent {
/// it is considered to be a [ConstructUseTypeEnum.corIt].
/// If the [token] is not included in any choreoStep, it is considered to be a [ConstructUseTypeEnum.wa].
List<OneConstructUse> _getVocabUseForToken(PangeaToken token) {
debugger();
if (originalSent?.choreo == null) {
final bool inUserL2 = originalSent?.langCode == l2Code;
return _lemmasToVocabUses(

View file

@ -1,5 +1,6 @@
import 'dart:developer';
import 'package:fluffychat/pangea/controllers/language_detection_controller.dart';
import 'package:fluffychat/pangea/models/pangea_match_model.dart';
import 'package:fluffychat/pangea/models/pangea_token_model.dart';
import 'package:fluffychat/pangea/models/span_card_model.dart';
@ -13,12 +14,11 @@ import 'package:matrix/matrix.dart';
import 'package:sentry_flutter/sentry_flutter.dart';
import '../constants/model_keys.dart';
import 'language_detection_model.dart';
// import 'package:language_tool/language_tool.dart';
class IGCTextData {
List<LanguageDetection> detections;
LanguageDetectionResponse detections;
String originalInput;
String? fullTextCorrection;
List<PangeaToken> tokens;
@ -42,6 +42,17 @@ class IGCTextData {
});
factory IGCTextData.fromJson(Map<String, dynamic> json) {
// changing this to allow for use of the LanguageDetectionResponse methods
// TODO - change API after we're sure all clients are updated. not urgent.
final LanguageDetectionResponse detections =
json[_detectionsKey] is Iterable
? LanguageDetectionResponse.fromJson({
"detections": json[_detectionsKey],
"full_text": json["original_input"],
})
: LanguageDetectionResponse.fromJson(
json[_detectionsKey] as Map<String, dynamic>);
return IGCTextData(
tokens: (json[_tokensKey] as Iterable)
.map<PangeaToken>(
@ -59,12 +70,7 @@ class IGCTextData {
.toList()
.cast<PangeaMatch>()
: [],
detections: (json[_detectionsKey] as Iterable)
.map<LanguageDetection>(
(e) => LanguageDetection.fromJson(e as Map<String, dynamic>),
)
.toList()
.cast<LanguageDetection>(),
detections: detections,
originalInput: json["original_input"],
fullTextCorrection: json["full_text_correction"],
userL1: json[ModelKey.userL1],
@ -79,7 +85,7 @@ class IGCTextData {
static const String _detectionsKey = "detections";
Map<String, dynamic> toJson() => {
_detectionsKey: detections.map((e) => e.toJson()).toList(),
_detectionsKey: detections.toJson(),
"original_input": originalInput,
"full_text_correction": fullTextCorrection,
_tokensKey: tokens.map((e) => e.toJson()).toList(),
@ -90,6 +96,8 @@ class IGCTextData {
"enable_igc": enableIGC,
};
String get detectedLanguage => detections.bestDetection().langCode;
// reconstruct fullText based on accepted match
//update offsets in existing matches to reflect the change
//if existing matches overlap with the accepted one, remove them??

View file

@ -1,6 +1,7 @@
import 'dart:convert';
import 'package:fluffychat/pangea/config/environment.dart';
import 'package:fluffychat/pangea/controllers/language_detection_controller.dart';
import 'package:fluffychat/pangea/models/language_detection_model.dart';
import 'package:fluffychat/pangea/models/lemma.dart';
import 'package:fluffychat/pangea/models/pangea_match_model.dart';
@ -39,7 +40,10 @@ class IgcRepo {
await Future.delayed(const Duration(seconds: 2));
final IGCTextData igcTextData = IGCTextData(
detections: [LanguageDetection(langCode: "en", confidence: 0.99)],
detections: LanguageDetectionResponse(
detections: [LanguageDetection(langCode: "en", confidence: 0.99)],
fullText: "This be a sample text",
),
tokens: [
PangeaToken(
text: PangeaTokenText(content: "This", offset: 0, length: 4),