using user languages in tokenization and language detectio
This commit is contained in:
parent
7b60190614
commit
dd29817e08
5 changed files with 130 additions and 125 deletions
|
|
@ -14,26 +14,26 @@ class LanguageDetectionRequest {
|
|||
/// The full text from which to detect the language.
|
||||
String fullText;
|
||||
|
||||
/// The base language of the user, if known. Including this is much preferred
|
||||
/// The base language of the user that sent the meessage, if known. Including this is much preferred
|
||||
/// and should return better results; however, it is not absolutely necessary.
|
||||
/// This property is nullable to allow for situations where the languages are not set
|
||||
/// at the time of the request.
|
||||
String? userL1;
|
||||
String? senderL1;
|
||||
|
||||
/// The target language of the user. This is expected to be set for the request
|
||||
/// The target language of the user that sent the message. This is expected to be set for the request
|
||||
/// but is nullable to handle edge cases where it might not be.
|
||||
String? userL2;
|
||||
String? senderL2;
|
||||
|
||||
LanguageDetectionRequest({
|
||||
required this.fullText,
|
||||
this.userL1 = "",
|
||||
required this.userL2,
|
||||
required this.senderL1,
|
||||
required this.senderL2,
|
||||
});
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
'full_text': fullText,
|
||||
'user_l1': userL1,
|
||||
'user_l2': userL2,
|
||||
'sender_l1': senderL1,
|
||||
'sender_l2': senderL2,
|
||||
};
|
||||
|
||||
@override
|
||||
|
|
@ -41,12 +41,12 @@ class LanguageDetectionRequest {
|
|||
if (identical(this, other)) return true;
|
||||
return other is LanguageDetectionRequest &&
|
||||
other.fullText == fullText &&
|
||||
other.userL1 == userL1 &&
|
||||
other.userL2 == userL2;
|
||||
other.senderL1 == senderL1 &&
|
||||
other.senderL2 == senderL2;
|
||||
}
|
||||
|
||||
@override
|
||||
int get hashCode => fullText.hashCode ^ userL1.hashCode ^ userL2.hashCode;
|
||||
int get hashCode => fullText.hashCode ^ senderL1.hashCode ^ senderL2.hashCode;
|
||||
}
|
||||
|
||||
class LanguageDetectionResponse {
|
||||
|
|
@ -125,19 +125,6 @@ class LanguageDetectionController {
|
|||
_cacheClearTimer?.cancel();
|
||||
}
|
||||
|
||||
Future<LanguageDetectionResponse> detectLanguage(
|
||||
String fullText,
|
||||
String? userL2,
|
||||
String? userL1,
|
||||
) async {
|
||||
final LanguageDetectionRequest params = LanguageDetectionRequest(
|
||||
fullText: fullText,
|
||||
userL1: userL1,
|
||||
userL2: userL2,
|
||||
);
|
||||
return get(params);
|
||||
}
|
||||
|
||||
Future<LanguageDetectionResponse> get(
|
||||
LanguageDetectionRequest params,
|
||||
) async {
|
||||
|
|
|
|||
|
|
@ -1,14 +1,19 @@
|
|||
import 'dart:async';
|
||||
import 'dart:convert';
|
||||
|
||||
import 'package:fluffychat/pangea/config/environment.dart';
|
||||
import 'package:fluffychat/pangea/controllers/base_controller.dart';
|
||||
import 'package:fluffychat/pangea/controllers/pangea_controller.dart';
|
||||
import 'package:fluffychat/pangea/extensions/pangea_room_extension/pangea_room_extension.dart';
|
||||
import 'package:fluffychat/pangea/models/pangea_token_model.dart';
|
||||
import 'package:fluffychat/pangea/models/representation_content_model.dart';
|
||||
import 'package:fluffychat/pangea/models/token_api_models.dart';
|
||||
import 'package:fluffychat/pangea/models/tokens_event_content_model.dart';
|
||||
import 'package:fluffychat/pangea/repo/tokens_repo.dart';
|
||||
import 'package:fluffychat/pangea/network/requests.dart';
|
||||
import 'package:fluffychat/pangea/network/urls.dart';
|
||||
import 'package:flutter/foundation.dart';
|
||||
import 'package:flutter/material.dart';
|
||||
import 'package:http/http.dart';
|
||||
import 'package:matrix/matrix.dart';
|
||||
|
||||
import '../constants/pangea_event_types.dart';
|
||||
|
|
@ -49,6 +54,38 @@ class MessageDataController extends BaseController {
|
|||
super.dispose();
|
||||
}
|
||||
|
||||
/// get tokens from the server
|
||||
static Future<TokensResponseModel> _fetchTokens(
|
||||
String accessToken,
|
||||
TokensRequestModel request,
|
||||
) async {
|
||||
final Requests req = Requests(
|
||||
choreoApiKey: Environment.choreoApiKey,
|
||||
accessToken: accessToken,
|
||||
);
|
||||
|
||||
final Response res = await req.post(
|
||||
url: PApiUrls.tokenize,
|
||||
body: request.toJson(),
|
||||
);
|
||||
|
||||
final TokensResponseModel response = TokensResponseModel.fromJson(
|
||||
jsonDecode(
|
||||
utf8.decode(res.bodyBytes).toString(),
|
||||
),
|
||||
);
|
||||
|
||||
if (response.tokens.isEmpty) {
|
||||
ErrorHandler.logError(
|
||||
e: Exception(
|
||||
"empty tokens in tokenize response return",
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
/// get tokens from the server
|
||||
/// if repEventId is not null, send the tokens to the room
|
||||
Future<List<PangeaToken>> _getTokens({
|
||||
|
|
@ -56,7 +93,7 @@ class MessageDataController extends BaseController {
|
|||
required TokensRequestModel req,
|
||||
required Room? room,
|
||||
}) async {
|
||||
final TokensResponseModel res = await TokensRepo.tokenize(
|
||||
final TokensResponseModel res = await _fetchTokens(
|
||||
_pangeaController.userController.accessToken,
|
||||
req,
|
||||
);
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@ import 'dart:developer';
|
|||
import 'package:fluffychat/pangea/extensions/pangea_event_extension.dart';
|
||||
import 'package:fluffychat/pangea/matrix_event_wrappers/pangea_choreo_event.dart';
|
||||
import 'package:fluffychat/pangea/models/pangea_token_model.dart';
|
||||
import 'package:fluffychat/pangea/models/token_api_models.dart';
|
||||
import 'package:fluffychat/pangea/models/tokens_event_content_model.dart';
|
||||
import 'package:fluffychat/pangea/repo/tokens_repo.dart';
|
||||
import 'package:flutter/foundation.dart';
|
||||
import 'package:flutter/material.dart';
|
||||
import 'package:matrix/matrix.dart';
|
||||
|
|
@ -135,13 +135,17 @@ class RepresentationEvent {
|
|||
await MatrixState.pangeaController.messageData.getTokens(
|
||||
repEventId: _event?.eventId,
|
||||
room: _event?.room ?? parentMessageEvent.room,
|
||||
// Jordan - for just tokens, it's not clear which languages to pass
|
||||
req: TokensRequestModel(
|
||||
fullText: text,
|
||||
userL1:
|
||||
langCode: langCode,
|
||||
senderL1:
|
||||
MatrixState.pangeaController.languageController.userL1?.langCode ??
|
||||
LanguageKeys.unknownLanguage,
|
||||
userL2: langCode,
|
||||
// since langCode is known, senderL2 will be used to determine whether these tokens
|
||||
// need pos/mporph tags whether lemmas are eligible to marked as "save_vocab=true"
|
||||
senderL2:
|
||||
MatrixState.pangeaController.languageController.userL2?.langCode ??
|
||||
LanguageKeys.unknownLanguage,
|
||||
),
|
||||
);
|
||||
|
||||
|
|
|
|||
72
lib/pangea/models/token_api_models.dart
Normal file
72
lib/pangea/models/token_api_models.dart
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
import 'package:fluffychat/pangea/constants/model_keys.dart';
|
||||
|
||||
import 'pangea_token_model.dart';
|
||||
|
||||
class TokensRequestModel {
|
||||
/// the text to be tokenized
|
||||
String fullText;
|
||||
|
||||
/// if known, [langCode] is the language of of the text
|
||||
/// it is used to determine which model to use in tokenizing
|
||||
String? langCode;
|
||||
|
||||
/// [senderL1] and [senderL2] are the languages of the sender
|
||||
/// if langCode is not known, the [senderL1] and [senderL2] will be used to help determine the language of the text
|
||||
/// if langCode is known, [senderL1] and [senderL2] will be used to determine whether the tokens need
|
||||
/// pos/mporph tags and whether lemmas are eligible to marked as "save_vocab=true"
|
||||
String senderL1;
|
||||
|
||||
/// [senderL1] and [senderL2] are the languages of the sender
|
||||
/// if langCode is not known, the [senderL1] and [senderL2] will be used to help determine the language of the text
|
||||
/// if langCode is known, [senderL1] and [senderL2] will be used to determine whether the tokens need
|
||||
/// pos/mporph tags and whether lemmas are eligible to marked as "save_vocab=true"
|
||||
String senderL2;
|
||||
|
||||
TokensRequestModel({
|
||||
required this.fullText,
|
||||
required this.langCode,
|
||||
required this.senderL1,
|
||||
required this.senderL2,
|
||||
});
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
ModelKey.fullText: fullText,
|
||||
ModelKey.userL1: senderL1,
|
||||
ModelKey.userL2: senderL2,
|
||||
ModelKey.langCode: langCode,
|
||||
};
|
||||
|
||||
// override equals and hashcode
|
||||
@override
|
||||
bool operator ==(Object other) {
|
||||
if (identical(this, other)) return true;
|
||||
|
||||
return other is TokensRequestModel &&
|
||||
other.fullText == fullText &&
|
||||
other.senderL1 == senderL1 &&
|
||||
other.senderL2 == senderL2;
|
||||
}
|
||||
|
||||
@override
|
||||
int get hashCode => fullText.hashCode ^ senderL1.hashCode ^ senderL2.hashCode;
|
||||
}
|
||||
|
||||
class TokensResponseModel {
|
||||
List<PangeaToken> tokens;
|
||||
String lang;
|
||||
|
||||
TokensResponseModel({required this.tokens, required this.lang});
|
||||
|
||||
factory TokensResponseModel.fromJson(
|
||||
Map<String, dynamic> json,
|
||||
) =>
|
||||
TokensResponseModel(
|
||||
tokens: (json[ModelKey.tokens] as Iterable)
|
||||
.map<PangeaToken>(
|
||||
(e) => PangeaToken.fromJson(e as Map<String, dynamic>),
|
||||
)
|
||||
.toList()
|
||||
.cast<PangeaToken>(),
|
||||
lang: json[ModelKey.lang],
|
||||
);
|
||||
}
|
||||
|
|
@ -1,95 +0,0 @@
|
|||
import 'dart:convert';
|
||||
|
||||
import 'package:http/http.dart';
|
||||
|
||||
import 'package:fluffychat/pangea/constants/model_keys.dart';
|
||||
import 'package:fluffychat/pangea/utils/error_handler.dart';
|
||||
import '../config/environment.dart';
|
||||
import '../models/pangea_token_model.dart';
|
||||
import '../network/requests.dart';
|
||||
import '../network/urls.dart';
|
||||
|
||||
class TokensRepo {
|
||||
static Future<TokensResponseModel> tokenize(
|
||||
String accessToken,
|
||||
TokensRequestModel request,
|
||||
) async {
|
||||
final Requests req = Requests(
|
||||
choreoApiKey: Environment.choreoApiKey,
|
||||
accessToken: accessToken,
|
||||
);
|
||||
|
||||
final Response res = await req.post(
|
||||
url: PApiUrls.tokenize,
|
||||
body: request.toJson(),
|
||||
);
|
||||
|
||||
final TokensResponseModel response = TokensResponseModel.fromJson(
|
||||
jsonDecode(
|
||||
utf8.decode(res.bodyBytes).toString(),
|
||||
),
|
||||
);
|
||||
|
||||
if (response.tokens.isEmpty) {
|
||||
ErrorHandler.logError(
|
||||
e: Exception(
|
||||
"empty tokens in tokenize response return",
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
||||
|
||||
class TokensRequestModel {
|
||||
String fullText;
|
||||
String userL1;
|
||||
String userL2;
|
||||
|
||||
TokensRequestModel({
|
||||
required this.fullText,
|
||||
required this.userL1,
|
||||
required this.userL2,
|
||||
});
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
ModelKey.fullText: fullText,
|
||||
ModelKey.userL1: userL1,
|
||||
ModelKey.userL2: userL2,
|
||||
};
|
||||
|
||||
// override equals and hashcode
|
||||
@override
|
||||
bool operator ==(Object other) {
|
||||
if (identical(this, other)) return true;
|
||||
|
||||
return other is TokensRequestModel &&
|
||||
other.fullText == fullText &&
|
||||
other.userL1 == userL1 &&
|
||||
other.userL2 == userL2;
|
||||
}
|
||||
|
||||
@override
|
||||
int get hashCode => fullText.hashCode ^ userL1.hashCode ^ userL2.hashCode;
|
||||
}
|
||||
|
||||
class TokensResponseModel {
|
||||
List<PangeaToken> tokens;
|
||||
String lang;
|
||||
|
||||
TokensResponseModel({required this.tokens, required this.lang});
|
||||
|
||||
factory TokensResponseModel.fromJson(
|
||||
Map<String, dynamic> json,
|
||||
) =>
|
||||
TokensResponseModel(
|
||||
tokens: (json[ModelKey.tokens] as Iterable)
|
||||
.map<PangeaToken>(
|
||||
(e) => PangeaToken.fromJson(e as Map<String, dynamic>),
|
||||
)
|
||||
.toList()
|
||||
.cast<PangeaToken>(),
|
||||
lang: json[ModelKey.lang],
|
||||
);
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue