Fix script to exclude placeholder keys from extraction
Co-authored-by: ggurdin <46800240+ggurdin@users.noreply.github.com>
This commit is contained in:
parent
a8c1d0130d
commit
d4073668c3
2 changed files with 34 additions and 14 deletions
|
|
@ -10,9 +10,10 @@ The script was created to clean up the internationalization (i18n) files by find
|
|||
|
||||
1. **Extracts Keys**: Reads `lib/l10n/intl_en.arb` and extracts all translation keys after line 3243 (configurable)
|
||||
2. **Filters Metadata**: Automatically excludes keys starting with `@` (metadata keys)
|
||||
3. **Searches Repository**: Uses `git grep` to efficiently search for each key in the repository
|
||||
4. **Filters Results**: Excludes matches found only in `.arb` files (other language files)
|
||||
5. **Reports Findings**: Generates a JSON file with the list of unused keys
|
||||
3. **Filters Placeholders**: Excludes nested placeholder keys inside metadata objects (e.g., `l1`, `l2`, `type`, `placeholders`)
|
||||
4. **Searches Repository**: Uses `git grep` to efficiently search for each key in the repository
|
||||
5. **Filters Results**: Excludes matches found only in `.arb` files (other language files)
|
||||
6. **Reports Findings**: Generates a JSON file with the list of unused keys
|
||||
|
||||
## Usage
|
||||
|
||||
|
|
@ -75,10 +76,13 @@ Found 488 unused keys (not referenced in any .dart files):
|
|||
|
||||
- **Unused keys**: Translation keys that appear only in `.arb` files and nowhere else in the codebase
|
||||
- **Metadata keys** (starting with `@`) are automatically excluded from the analysis
|
||||
- **Placeholder keys** (nested inside metadata objects like `placeholders`) are automatically excluded
|
||||
|
||||
## Notes
|
||||
|
||||
- Keys starting with `@` are metadata and are automatically skipped
|
||||
- Nested keys inside metadata objects (like `l1`, `l2` in placeholders) are automatically filtered out
|
||||
- Only top-level translation keys are analyzed
|
||||
- The script searches only for exact key matches in the repository
|
||||
- False positives are possible if keys are constructed dynamically (e.g., using string interpolation)
|
||||
- Always review the unused keys list before removing them from the translation files
|
||||
|
|
|
|||
|
|
@ -28,7 +28,11 @@ def extract_keys_after_line(arb_file_path: str, start_line: int = 3243) -> List[
|
|||
Extract translation keys from .arb file after a specific line.
|
||||
|
||||
ARB files are JSON files where keys starting with @ are metadata.
|
||||
We only want the actual translation keys (non-@ keys).
|
||||
We only want the actual translation keys (non-@ keys), not placeholder
|
||||
keys or other nested metadata fields.
|
||||
|
||||
This function extracts only TOP-LEVEL keys that first appear after the
|
||||
specified line number. Keys that appear as placeholders are ignored.
|
||||
|
||||
Args:
|
||||
arb_file_path: Path to the .arb file
|
||||
|
|
@ -37,21 +41,33 @@ def extract_keys_after_line(arb_file_path: str, start_line: int = 3243) -> List[
|
|||
Returns:
|
||||
List of translation key names
|
||||
"""
|
||||
keys = []
|
||||
# Load the entire JSON to get proper structure
|
||||
with open(arb_file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Read file again to get line numbers for each key
|
||||
with open(arb_file_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# Start from the specified line (convert to 0-indexed)
|
||||
for line_num, line in enumerate(lines[start_line - 1:], start=start_line):
|
||||
# Look for keys in JSON format: "keyName": "value"
|
||||
keys = []
|
||||
|
||||
# Extract only top-level keys (not nested keys inside metadata)
|
||||
for key in data.keys():
|
||||
# Skip metadata keys (those starting with @)
|
||||
match = re.match(r'\s*"([^"]+)":\s*["{]', line)
|
||||
if match:
|
||||
key = match.group(1)
|
||||
# Explicitly skip keys that start with @
|
||||
if not key.startswith('@'):
|
||||
keys.append(key)
|
||||
if key.startswith('@'):
|
||||
continue
|
||||
|
||||
# Find the FIRST occurrence of this key as a top-level definition
|
||||
# A top-level key appears at the start of a line (after whitespace)
|
||||
# with the pattern: "keyName": (not nested inside another object)
|
||||
for line_num, line in enumerate(lines, start=1):
|
||||
# Match key at the beginning of a line (indentation level 1)
|
||||
# This ensures we're matching top-level keys, not nested ones
|
||||
if re.match(r'^ "' + re.escape(key) + r'":\s*', line):
|
||||
# Only include keys that appear after the specified line
|
||||
if line_num > start_line:
|
||||
keys.append(key)
|
||||
break
|
||||
|
||||
return keys
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue