From d4073668c33de37bf743d605c21f8d0af9e1e301 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 17 Nov 2025 19:16:00 +0000
Subject: [PATCH] Fix script to exclude placeholder keys from extraction

Co-authored-by: ggurdin <46800240+ggurdin@users.noreply.github.com>
---
 scripts/FIND_UNUSED_INTL_KEYS.md | 10 ++++++---
 scripts/find_unused_intl_keys.py | 38 +++++++++++++++++++++++---------
 2 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/scripts/FIND_UNUSED_INTL_KEYS.md b/scripts/FIND_UNUSED_INTL_KEYS.md
index 9b623e0ac..ecc3176ac 100644
--- a/scripts/FIND_UNUSED_INTL_KEYS.md
+++ b/scripts/FIND_UNUSED_INTL_KEYS.md
@@ -10,9 +10,10 @@ The script was created to clean up the internationalization (i18n) files by find
 
 1. **Extracts Keys**: Reads `lib/l10n/intl_en.arb` and extracts all translation keys after line 3243 (configurable)
 2. **Filters Metadata**: Automatically excludes keys starting with `@` (metadata keys)
-3. **Searches Repository**: Uses `git grep` to efficiently search for each key in the repository
-4. **Filters Results**: Excludes matches found only in `.arb` files (other language files)
-5. **Reports Findings**: Generates a JSON file with the list of unused keys
+3. **Filters Placeholders**: Excludes nested placeholder keys inside metadata objects (e.g., `l1`, `l2`, `type`, `placeholders`)
+4. **Searches Repository**: Uses `git grep` to efficiently search for each key in the repository
+5. **Filters Results**: Excludes matches found only in `.arb` files (other language files)
+6. **Reports Findings**: Generates a JSON file with the list of unused keys
 
 ## Usage
 
@@ -75,10 +76,13 @@ Found 488 unused keys (not referenced in any .dart files):
 
 - **Unused keys**: Translation keys that appear only in `.arb` files and nowhere else in the codebase
 - **Metadata keys** (starting with `@`) are automatically excluded from the analysis
+- **Placeholder keys** (nested inside metadata objects like `placeholders`) are automatically excluded
 
 ## Notes
 
 - Keys starting with `@` are metadata and are automatically skipped
+- Nested keys inside metadata objects (like `l1`, `l2` in placeholders) are automatically filtered out
+- Only top-level translation keys are analyzed
 - The script searches only for exact key matches in the repository
 - False positives are possible if keys are constructed dynamically (e.g., using string interpolation)
 - Always review the unused keys list before removing them from the translation files
diff --git a/scripts/find_unused_intl_keys.py b/scripts/find_unused_intl_keys.py
index 15378e11f..61b28fed3 100755
--- a/scripts/find_unused_intl_keys.py
+++ b/scripts/find_unused_intl_keys.py
@@ -28,7 +28,11 @@ def extract_keys_after_line(arb_file_path: str, start_line: int = 3243) -> List[
     Extract translation keys from .arb file after a specific line.
     
     ARB files are JSON files where keys starting with @ are metadata.
-    We only want the actual translation keys (non-@ keys).
+    We only want the actual translation keys (non-@ keys), not placeholder
+    keys or other nested metadata fields.
+    
+    This function extracts only TOP-LEVEL keys that first appear after the
+    specified line number. Keys that appear as placeholders are ignored.
     
     Args:
         arb_file_path: Path to the .arb file
@@ -37,21 +41,33 @@ def extract_keys_after_line(arb_file_path: str, start_line: int = 3243) -> List[
     Returns:
         List of translation key names
     """
-    keys = []
+    # Load the entire JSON to get proper structure
+    with open(arb_file_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
     
+    # Read file again to get line numbers for each key
     with open(arb_file_path, 'r', encoding='utf-8') as f:
         lines = f.readlines()
     
-    # Start from the specified line (convert to 0-indexed)
-    for line_num, line in enumerate(lines[start_line - 1:], start=start_line):
-        # Look for keys in JSON format: "keyName": "value"
+    keys = []
+    
+    # Extract only top-level keys (not nested keys inside metadata)
+    for key in data.keys():
         # Skip metadata keys (those starting with @)
-        match = re.match(r'\s*"([^"]+)":\s*["{]', line)
-        if match:
-            key = match.group(1)
-            # Explicitly skip keys that start with @
-            if not key.startswith('@'):
-                keys.append(key)
+        if key.startswith('@'):
+            continue
+        
+        # Find the FIRST occurrence of this key as a top-level definition
+        # A top-level key appears at the start of a line (after whitespace)
+        # with the pattern: "keyName": (not nested inside another object)
+        for line_num, line in enumerate(lines, start=1):
+            # Match key at the beginning of a line (indentation level 1)
+            # This ensures we're matching top-level keys, not nested ones
+            if re.match(r'^  "' + re.escape(key) + r'":\s*', line):
+                # Only include keys that appear after the specified line
+                if line_num > start_line:
+                    keys.append(key)
+                break
     
     return keys