From 33719b3ee7fdfacd24bf114ac183afd573f0a655 Mon Sep 17 00:00:00 2001
From: wcjord <32568597+wcjord@users.noreply.github.com>
Date: Thu, 12 Feb 2026 14:52:22 -0500
Subject: [PATCH] Add score field to LLMFeedbackModel for human audit
 integration (#5684)

- Add score: Optional[int] to LLMFeedbackModel with conditional serialization
- Add ModelKey.score constant
- Update choreographer.instructions.md with feedback architecture docs

Closes krille-chan/fluffychat#2560
---
 .github/instructions/choreographer.instructions.md | 6 ++++++
 lib/pangea/common/constants/model_keys.dart        | 1 +
 lib/pangea/common/models/llm_feedback_model.dart   | 7 +++++++
 3 files changed, 14 insertions(+)
diff --git a/.github/instructions/choreographer.instructions.md b/.github/instructions/choreographer.instructions.md
index 3dcebbf2f..487e1e541 100644
--- a/.github/instructions/choreographer.instructions.md
+++ b/.github/instructions/choreographer.instructions.md
@@ -68,6 +68,12 @@ When a match is accepted/ignored, the `IgcController` fires `matchUpdateStream`.
 
 If the user is unsatisfied with results, `rerunWithFeedback(feedbackText)` re-calls IGC with user feedback and the previous request/response context (`_lastRequest`, `_lastResponse`).
 
+**What the client sends**: The feedback request sends `List<LLMFeedbackSchema>` items on the request body. Each item carries `feedback` (optional string), `content` (the previous response for context), and `score` (optional int, 0–10). The score lets native speakers approve content (9–10) or reject it (0–6), while learners typically send a low score with corrective text. We'll probably just do 0 or 10 corresponding to thumbs up/down, but the schema supports finer granularity if needed.
+
+**What the server does with it**: The router extracts `matrix_user_id` from the auth token and passes it along with the feedback list to `get()`. When feedback is present, `get()` builds an `Audit` internally (score + auditor + feedback text) and appends it to the CMS document (fire-and-forget). If the score indicates rejection (< 7), `get()` regenerates with an escalated model. The human judgment (who rejected/approved, why, when) lives on the `res.audit` array. See the server-side inference doc's Feedback Architecture section for the full flow.
+
+**Native speaker approval**: When a native speaker sends score 9–10, the server persists the audit (upgrading the doc to fine-tuning eligible) and returns the cached response without regeneration.
+
 ### 5. Sending
 
 On send, `Choreographer.getMessageContent()`:
diff --git a/lib/pangea/common/constants/model_keys.dart b/lib/pangea/common/constants/model_keys.dart
index 74d28e6e3..2f8f374c6 100644
--- a/lib/pangea/common/constants/model_keys.dart
+++ b/lib/pangea/common/constants/model_keys.dart
@@ -109,6 +109,7 @@ class ModelKey {
   static const String feedbackLang = "feedback_lang";
   static const String feedback = "feedback";
   static const String content = "content";
+  static const String score = "score";
 
   static const String transcription = "transcription";
   static const String botTranscription = 'bot_transcription';
diff --git a/lib/pangea/common/models/llm_feedback_model.dart b/lib/pangea/common/models/llm_feedback_model.dart
index cd43942b3..9433f7802 100644
--- a/lib/pangea/common/models/llm_feedback_model.dart
+++ b/lib/pangea/common/models/llm_feedback_model.dart
@@ -12,14 +12,21 @@ class LLMFeedbackModel<T> {
   /// Function to serialize the content to JSON
   final Map<String, dynamic> Function(T) contentToJson;
 
+  /// Optional 0-10 score for the content being reviewed.
+  /// Not yet used by any caller — field established for future
+  /// human audit / contributor score integration.
+  final int? score;
+
   const LLMFeedbackModel({
     required this.feedback,
     required this.content,
     required this.contentToJson,
+    this.score,
   });
 
   Map<String, dynamic> toJson() => {
     ModelKey.feedback: feedback,
     ModelKey.content: contentToJson(content),
+    if (score != null) ModelKey.score: score,
   };
 }