From 33719b3ee7fdfacd24bf114ac183afd573f0a655 Mon Sep 17 00:00:00 2001 From: wcjord <32568597+wcjord@users.noreply.github.com> Date: Thu, 12 Feb 2026 14:52:22 -0500 Subject: [PATCH] Add score field to LLMFeedbackModel for human audit integration (#5684) - Add score: Optional[int] to LLMFeedbackModel with conditional serialization - Add ModelKey.score constant - Update choreographer.instructions.md with feedback architecture docs Closes krille-chan/fluffychat#2560 --- .github/instructions/choreographer.instructions.md | 6 ++++++ lib/pangea/common/constants/model_keys.dart | 1 + lib/pangea/common/models/llm_feedback_model.dart | 7 +++++++ 3 files changed, 14 insertions(+) diff --git a/.github/instructions/choreographer.instructions.md b/.github/instructions/choreographer.instructions.md index 3dcebbf2f..487e1e541 100644 --- a/.github/instructions/choreographer.instructions.md +++ b/.github/instructions/choreographer.instructions.md @@ -68,6 +68,12 @@ When a match is accepted/ignored, the `IgcController` fires `matchUpdateStream`. If the user is unsatisfied with results, `rerunWithFeedback(feedbackText)` re-calls IGC with user feedback and the previous request/response context (`_lastRequest`, `_lastResponse`). +**What the client sends**: The feedback request sends `List` items on the request body. Each item carries `feedback` (optional string), `content` (the previous response for context), and `score` (optional int, 0–10). The score lets native speakers approve content (9–10) or reject it (0–6), while learners typically send a low score with corrective text. We'll probably just do 0 or 10 corresponding to thumbs up/down, but the schema supports finer granularity if needed. + +**What the server does with it**: The router extracts `matrix_user_id` from the auth token and passes it along with the feedback list to `get()`. When feedback is present, `get()` builds an `Audit` internally (score + auditor + feedback text) and appends it to the CMS document (fire-and-forget). If the score indicates rejection (< 7), `get()` regenerates with an escalated model. The human judgment (who rejected/approved, why, when) lives on the `res.audit` array. See the server-side inference doc's Feedback Architecture section for the full flow. + +**Native speaker approval**: When a native speaker sends score 9–10, the server persists the audit (upgrading the doc to fine-tuning eligible) and returns the cached response without regeneration. + ### 5. Sending On send, `Choreographer.getMessageContent()`: diff --git a/lib/pangea/common/constants/model_keys.dart b/lib/pangea/common/constants/model_keys.dart index 74d28e6e3..2f8f374c6 100644 --- a/lib/pangea/common/constants/model_keys.dart +++ b/lib/pangea/common/constants/model_keys.dart @@ -109,6 +109,7 @@ class ModelKey { static const String feedbackLang = "feedback_lang"; static const String feedback = "feedback"; static const String content = "content"; + static const String score = "score"; static const String transcription = "transcription"; static const String botTranscription = 'bot_transcription'; diff --git a/lib/pangea/common/models/llm_feedback_model.dart b/lib/pangea/common/models/llm_feedback_model.dart index cd43942b3..9433f7802 100644 --- a/lib/pangea/common/models/llm_feedback_model.dart +++ b/lib/pangea/common/models/llm_feedback_model.dart @@ -12,14 +12,21 @@ class LLMFeedbackModel { /// Function to serialize the content to JSON final Map Function(T) contentToJson; + /// Optional 0-10 score for the content being reviewed. + /// Not yet used by any caller — field established for future + /// human audit / contributor score integration. + final int? score; + const LLMFeedbackModel({ required this.feedback, required this.content, required this.contentToJson, + this.score, }); Map toJson() => { ModelKey.feedback: feedback, ModelKey.content: contentToJson(content), + if (score != null) ModelKey.score: score, }; }