initial work to remove speech to text controller from pangea controller
This commit is contained in:
parent
4dc948d197
commit
a56ba59316
10 changed files with 291 additions and 334 deletions
|
|
@ -21,8 +21,8 @@ import 'package:fluffychat/pangea/learning_settings/controllers/language_control
|
|||
import 'package:fluffychat/pangea/learning_settings/utils/locale_provider.dart';
|
||||
import 'package:fluffychat/pangea/learning_settings/utils/p_language_store.dart';
|
||||
import 'package:fluffychat/pangea/spaces/controllers/space_code_controller.dart';
|
||||
import 'package:fluffychat/pangea/speech_to_text/speech_to_text_controller.dart';
|
||||
import 'package:fluffychat/pangea/subscription/controllers/subscription_controller.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/controllers/speech_to_text_controller.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/controllers/text_to_speech_controller.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/controllers/tts_controller.dart';
|
||||
import 'package:fluffychat/pangea/user/controllers/permissions_controller.dart';
|
||||
|
|
|
|||
|
|
@ -19,9 +19,10 @@ import 'package:fluffychat/pangea/events/repo/language_detection_request.dart';
|
|||
import 'package:fluffychat/pangea/events/repo/language_detection_response.dart';
|
||||
import 'package:fluffychat/pangea/learning_settings/utils/p_language_store.dart';
|
||||
import 'package:fluffychat/pangea/spaces/models/space_model.dart';
|
||||
import 'package:fluffychat/pangea/speech_to_text/audio_encoding_enum.dart';
|
||||
import 'package:fluffychat/pangea/speech_to_text/speech_to_text_request_model.dart';
|
||||
import 'package:fluffychat/pangea/speech_to_text/speech_to_text_response_model.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/controllers/text_to_speech_controller.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/enums/audio_encoding_enum.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/models/speech_to_text_models.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/widgets/message_audio_card.dart';
|
||||
import 'package:fluffychat/pangea/translation/full_text_translation_request_model.dart';
|
||||
import 'package:fluffychat/widgets/future_loading_dialog.dart';
|
||||
|
|
@ -227,13 +228,13 @@ class PangeaMessageEvent {
|
|||
null;
|
||||
}).toSet();
|
||||
|
||||
SpeechToTextModel? getSpeechToTextLocal() {
|
||||
SpeechToTextResponseModel? getSpeechToTextLocal() {
|
||||
final rawBotTranscription =
|
||||
event.content.tryGetMap(ModelKey.botTranscription);
|
||||
|
||||
if (rawBotTranscription != null) {
|
||||
try {
|
||||
return SpeechToTextModel.fromJson(
|
||||
return SpeechToTextResponseModel.fromJson(
|
||||
Map<String, dynamic>.from(rawBotTranscription),
|
||||
);
|
||||
} catch (err, s) {
|
||||
|
|
@ -257,7 +258,7 @@ class PangeaMessageEvent {
|
|||
.speechToText;
|
||||
}
|
||||
|
||||
Future<SpeechToTextModel> getSpeechToText(
|
||||
Future<SpeechToTextResponseModel> getSpeechToText(
|
||||
String l1Code,
|
||||
String l2Code,
|
||||
) async {
|
||||
|
|
@ -268,7 +269,8 @@ class PangeaMessageEvent {
|
|||
final rawBotTranscription =
|
||||
event.content.tryGetMap(ModelKey.botTranscription);
|
||||
if (rawBotTranscription != null) {
|
||||
final SpeechToTextModel botTranscription = SpeechToTextModel.fromJson(
|
||||
final SpeechToTextResponseModel botTranscription =
|
||||
SpeechToTextResponseModel.fromJson(
|
||||
Map<String, dynamic>.from(rawBotTranscription),
|
||||
);
|
||||
|
||||
|
|
@ -290,7 +292,7 @@ class PangeaMessageEvent {
|
|||
return botTranscription;
|
||||
}
|
||||
|
||||
final SpeechToTextModel? speechToTextLocal = representations
|
||||
final SpeechToTextResponseModel? speechToTextLocal = representations
|
||||
.firstWhereOrNull(
|
||||
(element) => element.content.speechToText != null,
|
||||
)
|
||||
|
|
@ -303,7 +305,7 @@ class PangeaMessageEvent {
|
|||
|
||||
final matrixFile = await _event.downloadAndDecryptAttachment();
|
||||
|
||||
final SpeechToTextModel response =
|
||||
final SpeechToTextResponseModel response =
|
||||
await MatrixState.pangeaController.speechToText.get(
|
||||
SpeechToTextRequestModel(
|
||||
audioContent: matrixFile.bytes,
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import 'package:fluffychat/pangea/choreographer/choreo_record_model.dart';
|
|||
import 'package:fluffychat/pangea/choreographer/igc/pangea_match_status_enum.dart';
|
||||
import 'package:fluffychat/pangea/common/utils/error_handler.dart';
|
||||
import 'package:fluffychat/pangea/events/models/pangea_token_model.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/models/speech_to_text_models.dart';
|
||||
import 'package:fluffychat/pangea/speech_to_text/speech_to_text_response_model.dart';
|
||||
import 'package:fluffychat/widgets/matrix.dart';
|
||||
|
||||
/// this class is contained within a [RepresentationEvent]
|
||||
|
|
@ -30,7 +30,7 @@ class PangeaRepresentation {
|
|||
bool originalWritten;
|
||||
|
||||
// a representation can be create via speech to text on the original message
|
||||
SpeechToTextModel? speechToText;
|
||||
SpeechToTextResponseModel? speechToText;
|
||||
|
||||
// how do we know which representation was sent by author?
|
||||
// RepresentationEvent.text == PangeaMessageEvent.event.body
|
||||
|
|
@ -70,7 +70,7 @@ class PangeaRepresentation {
|
|||
originalWritten: json[_originalWrittenKey] ?? false,
|
||||
speechToText: json[_speechToTextKey] == null
|
||||
? null
|
||||
: SpeechToTextModel.fromJson(json[_speechToTextKey]),
|
||||
: SpeechToTextResponseModel.fromJson(json[_speechToTextKey]),
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,11 +26,8 @@ enum AudioEncodingEnum {
|
|||
speexWithHeaderByte,
|
||||
mp3,
|
||||
mp4,
|
||||
webmOpus,
|
||||
}
|
||||
webmOpus;
|
||||
|
||||
// Utility extension to map enum values to their corresponding string value as used by the API
|
||||
extension AudioEncodingExtension on AudioEncodingEnum {
|
||||
String get value {
|
||||
switch (this) {
|
||||
case AudioEncodingEnum.linear16:
|
||||
105
lib/pangea/speech_to_text/speech_to_text_repo.dart
Normal file
105
lib/pangea/speech_to_text/speech_to_text_repo.dart
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
import 'dart:convert';
|
||||
|
||||
import 'package:async/async.dart';
|
||||
import 'package:http/http.dart';
|
||||
|
||||
import 'package:fluffychat/pangea/common/config/environment.dart';
|
||||
import 'package:fluffychat/pangea/common/network/requests.dart';
|
||||
import 'package:fluffychat/pangea/common/network/urls.dart';
|
||||
import 'package:fluffychat/pangea/common/utils/error_handler.dart';
|
||||
import 'package:fluffychat/pangea/speech_to_text/speech_to_text_request_model.dart';
|
||||
import 'package:fluffychat/pangea/speech_to_text/speech_to_text_response_model.dart';
|
||||
|
||||
class _SpeechToTextCacheItem {
|
||||
final Future<SpeechToTextResponseModel> data;
|
||||
final DateTime timestamp;
|
||||
|
||||
const _SpeechToTextCacheItem({
|
||||
required this.data,
|
||||
required this.timestamp,
|
||||
});
|
||||
}
|
||||
|
||||
class SpeechToTextRepo {
|
||||
static final Map<String, _SpeechToTextCacheItem> _cache = {};
|
||||
static const Duration _cacheDuration = Duration(minutes: 10);
|
||||
|
||||
static Future<Result<SpeechToTextResponseModel>> get(
|
||||
String accessToken,
|
||||
SpeechToTextRequestModel request,
|
||||
) {
|
||||
final cached = _getCached(request);
|
||||
if (cached != null) {
|
||||
return _getResult(request, cached);
|
||||
}
|
||||
|
||||
final future = _fetch(accessToken, request);
|
||||
_setCached(request, future);
|
||||
return _getResult(request, future);
|
||||
}
|
||||
|
||||
static Future<SpeechToTextResponseModel> _fetch(
|
||||
String accessToken,
|
||||
SpeechToTextRequestModel request,
|
||||
) async {
|
||||
final Requests req = Requests(
|
||||
choreoApiKey: Environment.choreoApiKey,
|
||||
accessToken: accessToken,
|
||||
);
|
||||
|
||||
final Response res = await req.post(
|
||||
url: PApiUrls.simpleTranslation,
|
||||
body: request.toJson(),
|
||||
);
|
||||
|
||||
if (res.statusCode != 200) {
|
||||
throw Exception(
|
||||
'Failed to translate text: ${res.statusCode} ${res.reasonPhrase}',
|
||||
);
|
||||
}
|
||||
|
||||
return SpeechToTextResponseModel.fromJson(
|
||||
jsonDecode(utf8.decode(res.bodyBytes)),
|
||||
);
|
||||
}
|
||||
|
||||
static Future<Result<SpeechToTextResponseModel>> _getResult(
|
||||
SpeechToTextRequestModel request,
|
||||
Future<SpeechToTextResponseModel> future,
|
||||
) async {
|
||||
try {
|
||||
final res = await future;
|
||||
return Result.value(res);
|
||||
} catch (e, s) {
|
||||
_cache.remove(request.hashCode.toString());
|
||||
ErrorHandler.logError(
|
||||
e: e,
|
||||
s: s,
|
||||
data: request.toJson(),
|
||||
);
|
||||
return Result.error(e);
|
||||
}
|
||||
}
|
||||
|
||||
static Future<SpeechToTextResponseModel>? _getCached(
|
||||
SpeechToTextRequestModel request,
|
||||
) {
|
||||
final cacheKeys = [..._cache.keys];
|
||||
for (final key in cacheKeys) {
|
||||
if (DateTime.now().difference(_cache[key]!.timestamp) >= _cacheDuration) {
|
||||
_cache.remove(key);
|
||||
}
|
||||
}
|
||||
|
||||
return _cache[request.hashCode.toString()]?.data;
|
||||
}
|
||||
|
||||
static void _setCached(
|
||||
SpeechToTextRequestModel request,
|
||||
Future<SpeechToTextResponseModel> response,
|
||||
) =>
|
||||
_cache[request.hashCode.toString()] = _SpeechToTextCacheItem(
|
||||
data: response,
|
||||
timestamp: DateTime.now(),
|
||||
);
|
||||
}
|
||||
70
lib/pangea/speech_to_text/speech_to_text_request_model.dart
Normal file
70
lib/pangea/speech_to_text/speech_to_text_request_model.dart
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
import 'dart:convert';
|
||||
|
||||
import 'package:flutter/foundation.dart';
|
||||
|
||||
import 'package:matrix/matrix.dart';
|
||||
|
||||
import 'package:fluffychat/pangea/speech_to_text/audio_encoding_enum.dart';
|
||||
|
||||
class SpeechToTextRequestModel {
|
||||
final Uint8List audioContent;
|
||||
final SpeechToTextAudioConfigModel config;
|
||||
final Event? audioEvent;
|
||||
|
||||
SpeechToTextRequestModel({
|
||||
required this.audioContent,
|
||||
required this.config,
|
||||
this.audioEvent,
|
||||
});
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
"audio_content": base64Encode(audioContent),
|
||||
"config": config.toJson(),
|
||||
};
|
||||
|
||||
@override
|
||||
bool operator ==(Object other) {
|
||||
if (identical(this, other)) return true;
|
||||
if (other is! SpeechToTextRequestModel) return false;
|
||||
|
||||
return listEquals(audioContent, other.audioContent) &&
|
||||
config == other.config;
|
||||
}
|
||||
|
||||
@override
|
||||
int get hashCode {
|
||||
final bytesSample =
|
||||
audioContent.length > 10 ? audioContent.sublist(0, 10) : audioContent;
|
||||
return Object.hashAll([
|
||||
Object.hashAll(bytesSample),
|
||||
config.hashCode,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
class SpeechToTextAudioConfigModel {
|
||||
final AudioEncodingEnum encoding;
|
||||
final int sampleRateHertz;
|
||||
final bool enableWordConfidence;
|
||||
final bool enableAutomaticPunctuation;
|
||||
final String userL1;
|
||||
final String userL2;
|
||||
|
||||
SpeechToTextAudioConfigModel({
|
||||
required this.encoding,
|
||||
required this.userL1,
|
||||
required this.userL2,
|
||||
this.sampleRateHertz = 16000,
|
||||
this.enableWordConfidence = true,
|
||||
this.enableAutomaticPunctuation = true,
|
||||
});
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
"encoding": encoding.value,
|
||||
"sample_rate_hertz": sampleRateHertz,
|
||||
"user_l1": userL1,
|
||||
"user_l2": userL2,
|
||||
"enable_word_confidence": enableWordConfidence,
|
||||
"enable_automatic_punctuation": enableAutomaticPunctuation,
|
||||
};
|
||||
}
|
||||
|
|
@ -1,79 +1,118 @@
|
|||
import 'dart:convert';
|
||||
|
||||
import 'package:flutter/foundation.dart';
|
||||
import 'package:flutter/material.dart';
|
||||
|
||||
import 'package:matrix/matrix.dart';
|
||||
|
||||
import 'package:fluffychat/config/app_config.dart';
|
||||
import 'package:fluffychat/pangea/analytics_misc/construct_use_type_enum.dart';
|
||||
import 'package:fluffychat/pangea/analytics_misc/constructs_model.dart';
|
||||
import 'package:fluffychat/pangea/events/models/pangea_token_model.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/enums/audio_encoding_enum.dart';
|
||||
|
||||
const int thresholdForGreen = 80;
|
||||
class SpeechToTextResponseModel {
|
||||
final List<SpeechToTextResult> results;
|
||||
|
||||
class SpeechToTextAudioConfigModel {
|
||||
final AudioEncodingEnum encoding;
|
||||
final int sampleRateHertz;
|
||||
final bool enableWordConfidence;
|
||||
final bool enableAutomaticPunctuation;
|
||||
final String userL1;
|
||||
final String userL2;
|
||||
|
||||
SpeechToTextAudioConfigModel({
|
||||
required this.encoding,
|
||||
required this.userL1,
|
||||
required this.userL2,
|
||||
this.sampleRateHertz = 16000,
|
||||
this.enableWordConfidence = true,
|
||||
this.enableAutomaticPunctuation = true,
|
||||
SpeechToTextResponseModel({
|
||||
required this.results,
|
||||
});
|
||||
|
||||
Transcript get transcript => results.first.transcripts.first;
|
||||
|
||||
String get langCode => results.first.transcripts.first.langCode;
|
||||
|
||||
factory SpeechToTextResponseModel.fromJson(Map<String, dynamic> json) {
|
||||
final results = json['results'] as List;
|
||||
if (results.isEmpty) {
|
||||
throw Exception('SpeechToTextModel.fromJson: results is empty');
|
||||
}
|
||||
return SpeechToTextResponseModel(
|
||||
results: (json['results'] as List)
|
||||
.map((e) => SpeechToTextResult.fromJson(e))
|
||||
.toList(),
|
||||
);
|
||||
}
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
"encoding": encoding.value,
|
||||
"sample_rate_hertz": sampleRateHertz,
|
||||
"user_l1": userL1,
|
||||
"user_l2": userL2,
|
||||
"enable_word_confidence": enableWordConfidence,
|
||||
"enable_automatic_punctuation": enableAutomaticPunctuation,
|
||||
"results": results.map((e) => e.toJson()).toList(),
|
||||
};
|
||||
|
||||
List<OneConstructUse> constructs(
|
||||
String roomId,
|
||||
String eventId,
|
||||
) {
|
||||
final List<OneConstructUse> constructs = [];
|
||||
final metadata = ConstructUseMetaData(
|
||||
roomId: roomId,
|
||||
eventId: eventId,
|
||||
timeStamp: DateTime.now(),
|
||||
);
|
||||
for (final sstToken in transcript.sttTokens) {
|
||||
final token = sstToken.token;
|
||||
if (!token.lemma.saveVocab) continue;
|
||||
constructs.addAll(
|
||||
token.allUses(
|
||||
ConstructUseTypeEnum.pvm,
|
||||
metadata,
|
||||
ConstructUseTypeEnum.pvm.pointValue,
|
||||
),
|
||||
);
|
||||
}
|
||||
return constructs;
|
||||
}
|
||||
}
|
||||
|
||||
class SpeechToTextResult {
|
||||
final List<Transcript> transcripts;
|
||||
|
||||
SpeechToTextResult({required this.transcripts});
|
||||
|
||||
factory SpeechToTextResult.fromJson(Map<String, dynamic> json) =>
|
||||
SpeechToTextResult(
|
||||
transcripts: (json['transcripts'] as List)
|
||||
.map((e) => Transcript.fromJson(e))
|
||||
.toList(),
|
||||
);
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
"transcripts": transcripts.map((e) => e.toJson()).toList(),
|
||||
};
|
||||
}
|
||||
|
||||
class SpeechToTextRequestModel {
|
||||
final Uint8List audioContent;
|
||||
final SpeechToTextAudioConfigModel config;
|
||||
final Event? audioEvent;
|
||||
class Transcript {
|
||||
final String text;
|
||||
final int confidence;
|
||||
final List<STTToken> sttTokens;
|
||||
final String langCode;
|
||||
final int? wordsPerHr;
|
||||
|
||||
SpeechToTextRequestModel({
|
||||
required this.audioContent,
|
||||
required this.config,
|
||||
this.audioEvent,
|
||||
Transcript({
|
||||
required this.text,
|
||||
required this.confidence,
|
||||
required this.sttTokens,
|
||||
required this.langCode,
|
||||
required this.wordsPerHr,
|
||||
});
|
||||
|
||||
/// Returns the number of words per minute rounded to one decimal place.
|
||||
double? get wordsPerMinute => wordsPerHr != null ? wordsPerHr! / 60 : null;
|
||||
|
||||
factory Transcript.fromJson(Map<String, dynamic> json) => Transcript(
|
||||
text: json['transcript'],
|
||||
confidence: json['confidence'] <= 100
|
||||
? json['confidence']
|
||||
: json['confidence'] / 100,
|
||||
sttTokens: (json['stt_tokens'] as List)
|
||||
.map((e) => STTToken.fromJson(e))
|
||||
.toList(),
|
||||
langCode: json['lang_code'],
|
||||
wordsPerHr: json['words_per_hr'],
|
||||
);
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
"audio_content": base64Encode(audioContent),
|
||||
"config": config.toJson(),
|
||||
"transcript": text,
|
||||
"confidence": confidence,
|
||||
"stt_tokens": sttTokens.map((e) => e.toJson()).toList(),
|
||||
"lang_code": langCode,
|
||||
"words_per_hr": wordsPerHr,
|
||||
};
|
||||
|
||||
@override
|
||||
bool operator ==(Object other) {
|
||||
if (identical(this, other)) return true;
|
||||
if (other is! SpeechToTextRequestModel) return false;
|
||||
|
||||
return listEquals(audioContent, other.audioContent) &&
|
||||
config == other.config;
|
||||
}
|
||||
|
||||
@override
|
||||
int get hashCode {
|
||||
final bytesSample =
|
||||
audioContent.length > 10 ? audioContent.sublist(0, 10) : audioContent;
|
||||
return Object.hashAll([
|
||||
Object.hashAll(bytesSample),
|
||||
config.hashCode,
|
||||
]);
|
||||
}
|
||||
Color get color => confidence > 80 ? AppConfig.success : AppConfig.warning;
|
||||
}
|
||||
|
||||
class STTToken {
|
||||
|
|
@ -94,15 +133,7 @@ class STTToken {
|
|||
int get length => token.text.length;
|
||||
|
||||
Color color(BuildContext context) {
|
||||
// turning off the color coding for now
|
||||
// whisper doesn't include word-level confidence
|
||||
// if (confidence == null) {
|
||||
return Theme.of(context).colorScheme.onSurface;
|
||||
// }
|
||||
// if (confidence! > thresholdForGreen) {
|
||||
// return AppConfig.success;
|
||||
// }
|
||||
// return AppConfig.warning;
|
||||
}
|
||||
|
||||
factory STTToken.fromJson(Map<String, dynamic> json) {
|
||||
|
|
@ -147,118 +178,3 @@ class STTToken {
|
|||
]);
|
||||
}
|
||||
}
|
||||
|
||||
class Transcript {
|
||||
final String text;
|
||||
final int confidence;
|
||||
final List<STTToken> sttTokens;
|
||||
final String langCode;
|
||||
final int? wordsPerHr;
|
||||
|
||||
Transcript({
|
||||
required this.text,
|
||||
required this.confidence,
|
||||
required this.sttTokens,
|
||||
required this.langCode,
|
||||
required this.wordsPerHr,
|
||||
});
|
||||
|
||||
/// Returns the number of words per minute rounded to one decimal place.
|
||||
double? get wordsPerMinute => wordsPerHr != null ? wordsPerHr! / 60 : null;
|
||||
|
||||
factory Transcript.fromJson(Map<String, dynamic> json) => Transcript(
|
||||
text: json['transcript'],
|
||||
confidence: json['confidence'] <= 100
|
||||
? json['confidence']
|
||||
: json['confidence'] / 100,
|
||||
sttTokens: (json['stt_tokens'] as List)
|
||||
.map((e) => STTToken.fromJson(e))
|
||||
.toList(),
|
||||
langCode: json['lang_code'],
|
||||
wordsPerHr: json['words_per_hr'],
|
||||
);
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
"transcript": text,
|
||||
"confidence": confidence,
|
||||
"stt_tokens": sttTokens.map((e) => e.toJson()).toList(),
|
||||
"lang_code": langCode,
|
||||
"words_per_hr": wordsPerHr,
|
||||
};
|
||||
|
||||
Color color(BuildContext context) {
|
||||
if (confidence > thresholdForGreen) {
|
||||
return AppConfig.success;
|
||||
}
|
||||
return AppConfig.warning;
|
||||
}
|
||||
}
|
||||
|
||||
class SpeechToTextResult {
|
||||
final List<Transcript> transcripts;
|
||||
|
||||
SpeechToTextResult({required this.transcripts});
|
||||
|
||||
factory SpeechToTextResult.fromJson(Map<String, dynamic> json) =>
|
||||
SpeechToTextResult(
|
||||
transcripts: (json['transcripts'] as List)
|
||||
.map((e) => Transcript.fromJson(e))
|
||||
.toList(),
|
||||
);
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
"transcripts": transcripts.map((e) => e.toJson()).toList(),
|
||||
};
|
||||
}
|
||||
|
||||
class SpeechToTextModel {
|
||||
final List<SpeechToTextResult> results;
|
||||
|
||||
SpeechToTextModel({
|
||||
required this.results,
|
||||
});
|
||||
|
||||
Transcript get transcript => results.first.transcripts.first;
|
||||
|
||||
String get langCode => results.first.transcripts.first.langCode;
|
||||
|
||||
factory SpeechToTextModel.fromJson(Map<String, dynamic> json) {
|
||||
final results = json['results'] as List;
|
||||
if (results.isEmpty) {
|
||||
throw Exception('SpeechToTextModel.fromJson: results is empty');
|
||||
}
|
||||
return SpeechToTextModel(
|
||||
results: (json['results'] as List)
|
||||
.map((e) => SpeechToTextResult.fromJson(e))
|
||||
.toList(),
|
||||
);
|
||||
}
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
"results": results.map((e) => e.toJson()).toList(),
|
||||
};
|
||||
|
||||
List<OneConstructUse> constructs(
|
||||
String roomId,
|
||||
String eventId,
|
||||
) {
|
||||
final List<OneConstructUse> constructs = [];
|
||||
final metadata = ConstructUseMetaData(
|
||||
roomId: roomId,
|
||||
eventId: eventId,
|
||||
timeStamp: DateTime.now(),
|
||||
);
|
||||
for (final sstToken in transcript.sttTokens) {
|
||||
final token = sstToken.token;
|
||||
if (!token.lemma.saveVocab) continue;
|
||||
constructs.addAll(
|
||||
token.allUses(
|
||||
ConstructUseTypeEnum.pvm,
|
||||
metadata,
|
||||
ConstructUseTypeEnum.pvm.pointValue,
|
||||
),
|
||||
);
|
||||
}
|
||||
return constructs;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,133 +0,0 @@
|
|||
import 'dart:async';
|
||||
import 'dart:convert';
|
||||
|
||||
import 'package:flutter/foundation.dart';
|
||||
|
||||
import 'package:http/http.dart';
|
||||
|
||||
import 'package:fluffychat/pangea/common/controllers/pangea_controller.dart';
|
||||
import 'package:fluffychat/pangea/common/utils/error_handler.dart';
|
||||
import 'package:fluffychat/pangea/events/constants/pangea_event_types.dart';
|
||||
import 'package:fluffychat/pangea/events/models/representation_content_model.dart';
|
||||
import 'package:fluffychat/pangea/extensions/pangea_room_extension.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/models/speech_to_text_models.dart';
|
||||
import '../../common/config/environment.dart';
|
||||
import '../../common/network/requests.dart';
|
||||
import '../../common/network/urls.dart';
|
||||
|
||||
// Assuming SpeechToTextRequestModel, SpeechToTextModel and related models are already defined as in your provided code.
|
||||
|
||||
class _SpeechToTextCacheItem {
|
||||
Future<SpeechToTextModel> data;
|
||||
|
||||
_SpeechToTextCacheItem({required this.data});
|
||||
}
|
||||
|
||||
class SpeechToTextController {
|
||||
static final Map<int, _SpeechToTextCacheItem> _cache = {};
|
||||
late final PangeaController _pangeaController;
|
||||
Timer? _cacheClearTimer;
|
||||
|
||||
SpeechToTextController(this._pangeaController) {
|
||||
_initializeCacheClearing();
|
||||
}
|
||||
|
||||
void _initializeCacheClearing() {
|
||||
const duration = Duration(minutes: 2);
|
||||
_cacheClearTimer = Timer.periodic(duration, (Timer t) => _clearCache());
|
||||
}
|
||||
|
||||
void _clearCache() {
|
||||
_cache.clear();
|
||||
}
|
||||
|
||||
void dispose() {
|
||||
_cacheClearTimer?.cancel();
|
||||
}
|
||||
|
||||
Future<SpeechToTextModel> get(
|
||||
SpeechToTextRequestModel requestModel,
|
||||
) async {
|
||||
final int cacheKey = requestModel.hashCode;
|
||||
|
||||
if (_cache.containsKey(cacheKey)) {
|
||||
return _cache[cacheKey]!.data;
|
||||
} else {
|
||||
final Future<SpeechToTextModel> response = _fetchResponse(
|
||||
accessToken: _pangeaController.userController.accessToken,
|
||||
requestModel: requestModel,
|
||||
);
|
||||
_cache[cacheKey] = _SpeechToTextCacheItem(data: response);
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> saveSpeechToTextAsRepresentationEvent(
|
||||
SpeechToTextModel response,
|
||||
SpeechToTextRequestModel requestModel,
|
||||
) {
|
||||
if (requestModel.audioEvent == null) {
|
||||
debugPrint(
|
||||
'Audio event is null, case of giving speech to text before message sent, currently not implemented',
|
||||
);
|
||||
return Future.value(null);
|
||||
}
|
||||
debugPrint('Saving transcript as matrix event');
|
||||
|
||||
requestModel.audioEvent?.room
|
||||
.sendPangeaEvent(
|
||||
content: PangeaRepresentation(
|
||||
langCode: response.langCode,
|
||||
text: response.transcript.text,
|
||||
originalSent: false,
|
||||
originalWritten: false,
|
||||
speechToText: response,
|
||||
).toJson(),
|
||||
parentEventId: requestModel.audioEvent!.eventId,
|
||||
type: PangeaEventTypes.representation,
|
||||
)
|
||||
.then(
|
||||
(_) => debugPrint('Transcript saved as matrix event'),
|
||||
);
|
||||
|
||||
return Future.value(null);
|
||||
}
|
||||
|
||||
Future<SpeechToTextModel> _fetchResponse({
|
||||
required String accessToken,
|
||||
required SpeechToTextRequestModel requestModel,
|
||||
}) async {
|
||||
final Requests request = Requests(
|
||||
choreoApiKey: Environment.choreoApiKey,
|
||||
accessToken: accessToken,
|
||||
);
|
||||
|
||||
final Response res = await request.post(
|
||||
url: PApiUrls.speechToText,
|
||||
body: requestModel.toJson(),
|
||||
);
|
||||
|
||||
if (res.statusCode == 200) {
|
||||
final Map<String, dynamic> json = jsonDecode(utf8.decode(res.bodyBytes));
|
||||
|
||||
final response = SpeechToTextModel.fromJson(json);
|
||||
|
||||
saveSpeechToTextAsRepresentationEvent(response, requestModel).onError(
|
||||
(error, stackTrace) => ErrorHandler.logError(
|
||||
e: error,
|
||||
s: stackTrace,
|
||||
data: {
|
||||
"response": response.toJson(),
|
||||
"requestModel": requestModel.toJson(),
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
return response;
|
||||
} else {
|
||||
debugPrint('Error converting speech to text: ${res.body}');
|
||||
throw Exception('Failed to convert speech to text');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -9,17 +9,17 @@ import 'package:path_provider/path_provider.dart';
|
|||
import 'package:fluffychat/pangea/common/utils/async_state.dart';
|
||||
import 'package:fluffychat/pangea/events/event_wrappers/pangea_message_event.dart';
|
||||
import 'package:fluffychat/pangea/events/extensions/pangea_event_extension.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/models/speech_to_text_models.dart';
|
||||
import 'package:fluffychat/pangea/speech_to_text/speech_to_text_response_model.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/widgets/message_audio_card.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/widgets/select_mode_buttons.dart';
|
||||
import 'package:fluffychat/widgets/matrix.dart';
|
||||
|
||||
class _TranscriptionLoader extends AsyncLoader<SpeechToTextModel> {
|
||||
class _TranscriptionLoader extends AsyncLoader<SpeechToTextResponseModel> {
|
||||
final PangeaMessageEvent messageEvent;
|
||||
_TranscriptionLoader(this.messageEvent) : super();
|
||||
|
||||
@override
|
||||
Future<SpeechToTextModel> fetch() => messageEvent.getSpeechToText(
|
||||
Future<SpeechToTextResponseModel> fetch() => messageEvent.getSpeechToText(
|
||||
MatrixState.pangeaController.languageController.userL1!.langCodeShort,
|
||||
MatrixState.pangeaController.languageController.userL2!.langCodeShort,
|
||||
);
|
||||
|
|
@ -127,7 +127,7 @@ class SelectModeController {
|
|||
ValueNotifier<AsyncState<String>> get translationState =>
|
||||
_translationLoader.state;
|
||||
|
||||
ValueNotifier<AsyncState<SpeechToTextModel>> get transcriptionState =>
|
||||
ValueNotifier<AsyncState<SpeechToTextResponseModel>> get transcriptionState =>
|
||||
_transcriptLoader.state;
|
||||
|
||||
ValueNotifier<AsyncState<String>> get speechTranslationState =>
|
||||
|
|
|
|||
|
|
@ -2,10 +2,10 @@ import 'package:flutter/material.dart';
|
|||
|
||||
import 'package:fluffychat/pangea/events/models/pangea_token_model.dart';
|
||||
import 'package:fluffychat/pangea/message_token_text/tokens_util.dart';
|
||||
import 'package:fluffychat/pangea/toolbar/models/speech_to_text_models.dart';
|
||||
import 'package:fluffychat/pangea/speech_to_text/speech_to_text_response_model.dart';
|
||||
|
||||
class SttTranscriptTokens extends StatelessWidget {
|
||||
final SpeechToTextModel model;
|
||||
final SpeechToTextResponseModel model;
|
||||
final TextStyle? style;
|
||||
|
||||
final void Function(PangeaToken)? onClick;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue