From d4073668c33de37bf743d605c21f8d0af9e1e301 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 17 Nov 2025 19:16:00 +0000 Subject: [PATCH] Fix script to exclude placeholder keys from extraction Co-authored-by: ggurdin <46800240+ggurdin@users.noreply.github.com> --- scripts/FIND_UNUSED_INTL_KEYS.md | 10 ++++++--- scripts/find_unused_intl_keys.py | 38 +++++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/scripts/FIND_UNUSED_INTL_KEYS.md b/scripts/FIND_UNUSED_INTL_KEYS.md index 9b623e0ac..ecc3176ac 100644 --- a/scripts/FIND_UNUSED_INTL_KEYS.md +++ b/scripts/FIND_UNUSED_INTL_KEYS.md @@ -10,9 +10,10 @@ The script was created to clean up the internationalization (i18n) files by find 1. **Extracts Keys**: Reads `lib/l10n/intl_en.arb` and extracts all translation keys after line 3243 (configurable) 2. **Filters Metadata**: Automatically excludes keys starting with `@` (metadata keys) -3. **Searches Repository**: Uses `git grep` to efficiently search for each key in the repository -4. **Filters Results**: Excludes matches found only in `.arb` files (other language files) -5. **Reports Findings**: Generates a JSON file with the list of unused keys +3. **Filters Placeholders**: Excludes nested placeholder keys inside metadata objects (e.g., `l1`, `l2`, `type`, `placeholders`) +4. **Searches Repository**: Uses `git grep` to efficiently search for each key in the repository +5. **Filters Results**: Excludes matches found only in `.arb` files (other language files) +6. **Reports Findings**: Generates a JSON file with the list of unused keys ## Usage @@ -75,10 +76,13 @@ Found 488 unused keys (not referenced in any .dart files): - **Unused keys**: Translation keys that appear only in `.arb` files and nowhere else in the codebase - **Metadata keys** (starting with `@`) are automatically excluded from the analysis +- **Placeholder keys** (nested inside metadata objects like `placeholders`) are automatically excluded ## Notes - Keys starting with `@` are metadata and are automatically skipped +- Nested keys inside metadata objects (like `l1`, `l2` in placeholders) are automatically filtered out +- Only top-level translation keys are analyzed - The script searches only for exact key matches in the repository - False positives are possible if keys are constructed dynamically (e.g., using string interpolation) - Always review the unused keys list before removing them from the translation files diff --git a/scripts/find_unused_intl_keys.py b/scripts/find_unused_intl_keys.py index 15378e11f..61b28fed3 100755 --- a/scripts/find_unused_intl_keys.py +++ b/scripts/find_unused_intl_keys.py @@ -28,7 +28,11 @@ def extract_keys_after_line(arb_file_path: str, start_line: int = 3243) -> List[ Extract translation keys from .arb file after a specific line. ARB files are JSON files where keys starting with @ are metadata. - We only want the actual translation keys (non-@ keys). + We only want the actual translation keys (non-@ keys), not placeholder + keys or other nested metadata fields. + + This function extracts only TOP-LEVEL keys that first appear after the + specified line number. Keys that appear as placeholders are ignored. Args: arb_file_path: Path to the .arb file @@ -37,21 +41,33 @@ def extract_keys_after_line(arb_file_path: str, start_line: int = 3243) -> List[ Returns: List of translation key names """ - keys = [] + # Load the entire JSON to get proper structure + with open(arb_file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + # Read file again to get line numbers for each key with open(arb_file_path, 'r', encoding='utf-8') as f: lines = f.readlines() - # Start from the specified line (convert to 0-indexed) - for line_num, line in enumerate(lines[start_line - 1:], start=start_line): - # Look for keys in JSON format: "keyName": "value" + keys = [] + + # Extract only top-level keys (not nested keys inside metadata) + for key in data.keys(): # Skip metadata keys (those starting with @) - match = re.match(r'\s*"([^"]+)":\s*["{]', line) - if match: - key = match.group(1) - # Explicitly skip keys that start with @ - if not key.startswith('@'): - keys.append(key) + if key.startswith('@'): + continue + + # Find the FIRST occurrence of this key as a top-level definition + # A top-level key appears at the start of a line (after whitespace) + # with the pattern: "keyName": (not nested inside another object) + for line_num, line in enumerate(lines, start=1): + # Match key at the beginning of a line (indentation level 1) + # This ensures we're matching top-level keys, not nested ones + if re.match(r'^ "' + re.escape(key) + r'":\s*', line): + # Only include keys that appear after the specified line + if line_num > start_line: + keys.append(key) + break return keys