From d104edd65c5b93157e95cbbd5f0dc00de8f39767 Mon Sep 17 00:00:00 2001 From: Winston Lowe Date: Sun, 10 May 2026 16:04:39 -0700 Subject: [PATCH] added Groq and openAI APIs --- tools/translate.py | 1348 ++++++++++++++++++++++++++++++-------------- 1 file changed, 928 insertions(+), 420 deletions(-) diff --git a/tools/translate.py b/tools/translate.py index 905d435e..aa3fd82c 100644 --- a/tools/translate.py +++ b/tools/translate.py @@ -1,25 +1,58 @@ #!/usr/bin/env python3 """ -translate_arb_with_translategemma.py +translate_arb_with_ollama.py -Translates ARB/JSON localization files using TranslateGemma via Ollama. -Preserves placeholders like {deviceName} and ICU plural/select formats. +Translates ARB/JSON localization values using a local Ollama model, while: +- preserving keys +- skipping "@@locale" and all "@key" metadata blocks +- preserving placeholders like {deviceName}, {count, plural, ...} +- writing a new file with updated @@locale +- printing progress as it runs Usage: # Translate all strings: - python translate.py --in lib/l10n/app_en.arb --out lib/l10n/app_es.arb --to-locale es + python translate.py \ + --in ../lib/l10n/app_en.arb \ + --out ../lib/l10n/app_es.arb \ + --to-locale es \ + --model ministral-3:latest \ + --temperature 0 \ + --concurrency 4 - # Translate only missing strings: - python translate.py --in lib/l10n/app_en.arb --out lib/l10n/app_es.arb --to-locale es --missing-only + # Translate only missing/untranslated strings: + python translate.py \ + --in ../lib/l10n/app_en.arb \ + --out ../lib/l10n/app_es.arb \ + --to-locale es \ + --missing-only \ + --model ministral-3:latest # Translate all locales (missing strings only): - python translate.py --in lib/l10n/app_en.arb --l10n-dir lib/l10n --missing-only + python translate.py \ + --in ../lib/l10n/app_en.arb \ + --l10n-dir ../lib/l10n \ + --missing-only \ + --model ministral-3:latest - # New locales copied from app_en.arb still match English → --missing-only skips them. - # Translate every key that still equals the template (e.g. hu, ja, ko): - python translate.py --in lib/l10n/app_en.arb --l10n-dir lib/l10n --copy-of-template --only-locales hu,ja,ko + # Translate using Groq (very fast): + python translate.py \ + --in ../lib/l10n/app_en.arb + --l10n-dir ../lib/l10n \ + --missing-only \ + --backend groq --model llama-3.3-70b-versatile \ + --temperature 0.1 --concurrency 12 + + # Translate using local OpenAI-compatible server (LM Studio/llama.cpp/vLLM): + python translate.py \ + --in ../lib/l10n/app_en.arb \ + --l10n-dir ../lib/l10n \ + --missing-only \ + --backend openai \ + --openai-base-url http://localhost:1234/v1 \ + --model local-model-name """ +from __future__ import annotations import argparse import json import os @@ -31,9 +64,21 @@ from dataclasses import dataclass from typing import Any, Dict, List, Tuple, Optional from urllib import request +try: + from groq import Groq + GROQ_AVAILABLE = True +except ImportError: + GROQ_AVAILABLE = False -# Placeholder patterns +try: + from openai import OpenAI + OPENAI_AVAILABLE = True +except ImportError: + OPENAI_AVAILABLE = False + +# Simple placeholder like {name}, {count}, {deviceName} SIMPLE_PLACEHOLDER_RE = re.compile(r"\{(\w+)\}") +# ICU plural/select variable name extraction: {count, plural, ...} or {gender, select, ...} ICU_VAR_RE = re.compile(r"\{(\w+)\s*,\s*(?:plural|select|selectordinal)\s*,", re.IGNORECASE) @@ -43,47 +88,356 @@ class OllamaConfig: model: str timeout_s: float temperature: float + num_ctx: int + num_predict: int + top_p: float + +@dataclass +class GroqConfig: + client: Groq + model: str + temperature: float + max_tokens: int # Groq calls it max_tokens (not num_predict) + top_p: float -# Language mapping (locale_code -> (language_name, translategemma_code)) -LOCALE_MAP = { - "es": ("Spanish", "es"), - "fr": ("French", "fr"), - "de": ("German", "de"), - "it": ("Italian", "it"), - "pt": ("Portuguese", "pt"), - "pt-BR": ("Brazilian Portuguese", "pt"), - "ja": ("Japanese", "ja"), - "ko": ("Korean", "ko"), - "zh": ("Chinese", "zh-Hans"), - "zh-Hant": ("Chinese", "zh-Hant"), - "ru": ("Russian", "ru"), - "uk": ("Ukrainian", "uk"), - "ar": ("Arabic", "ar"), - "hi": ("Hindi", "hi"), - "tr": ("Turkish", "tr"), - "nl": ("Dutch", "nl"), - "sv": ("Swedish", "sv"), - "no": ("Norwegian", "no"), - "da": ("Danish", "da"), - "fi": ("Finnish", "fi"), - "pl": ("Polish", "pl"), - "cs": ("Czech", "cs"), - "sk": ("Slovak", "sk"), - "sl": ("Slovenian", "sl"), - "bg": ("Bulgarian", "bg"), - "hu": ("Hungarian", "hu"), - "el": ("Greek", "el"), - "he": ("Hebrew", "he"), - "th": ("Thai", "th"), - "vi": ("Vietnamese", "vi"), - "id": ("Indonesian", "id"), +@dataclass +class OpenAIConfig: + client: OpenAI + model: str + temperature: float + max_tokens: int + top_p: float + +def http_post_json(url: str, payload: Dict[str, Any], timeout_s: float) -> Dict[str, Any]: + data = json.dumps(payload).encode("utf-8") + req = request.Request( + url, + data=data, + headers={"Content-Type": "application/json"}, + method="POST", + ) + with request.urlopen(req, timeout=timeout_s) as resp: + body = resp.read().decode("utf-8") + return json.loads(body) + + +def strip_markdown(s: str) -> str: + """Remove common markdown formatting from output.""" + # Remove bold/italic markers + s = re.sub(r'\*\*(.+?)\*\*', r'\1', s) + s = re.sub(r'\*(.+?)\*', r'\1', s) + s = re.sub(r'__(.+?)__', r'\1', s) + s = re.sub(r'_(.+?)_', r'\1', s) + # Remove stray asterisks + s = re.sub(r'\*+', '', s) + return s.strip() + + +def ollama_generate(cfg: OllamaConfig, prompt: str) -> str: + url = cfg.host.rstrip("/") + "/api/generate" + payload = { + "model": cfg.model, + "prompt": prompt, + "stream": False, + "options": { + "temperature": cfg.temperature, + "num_ctx": cfg.num_ctx, + "num_predict": cfg.num_predict, + "top_p": cfg.top_p, + }, + } + resp = http_post_json(url, payload, cfg.timeout_s) + out = resp.get("response", "") + # Clean up common LLM artifacts + out = strip_markdown(out) + return out.strip() + +def groq_generate(cfg: GroqConfig, prompt: str) -> str: + try: + response = cfg.client.chat.completions.create( + model=cfg.model, + messages=[ + {"role": "user", "content": prompt}, + ], + temperature=cfg.temperature, + max_tokens=cfg.max_tokens, + top_p=cfg.top_p, + stream=False, + ) + out = response.choices[0].message.content or "" + out = strip_markdown(out) + return out.strip() + + except Exception as e: + raise RuntimeError(f"Groq API error: {str(e)}") from e + + +def openai_generate(cfg: OpenAIConfig, prompt: str) -> str: + def _call_openai(prompt_text: str, no_thinking: bool = False): + kwargs = { + "model": cfg.model, + "messages": [ + {"role": "user", "content": prompt_text}, + ], + "temperature": cfg.temperature, + "max_tokens": cfg.max_tokens, + "top_p": cfg.top_p, + "stream": False, + } + # Local OpenAI-compatible servers (vLLM/llama.cpp/LM Studio) may support this. + if no_thinking: + kwargs["extra_body"] = {"chat_template_kwargs": {"enable_thinking": False}} + return cfg.client.chat.completions.create(**kwargs) + + try: + response = _call_openai(prompt) + try: + print(f"[openai-debug] response json:\n{response.model_dump_json(indent=2)}", file=sys.stderr) + except Exception: + print(f"[openai-debug] response object: {response}", file=sys.stderr) + + choice = response.choices[0] + message = choice.message + out = (message.content or "").strip() + + # Some reasoning models put everything in reasoning_content and leave content empty. + if not out and getattr(message, "reasoning_content", None): + print( + "[openai-debug] Empty content with reasoning_content detected; retrying with no-thinking hint.", + file=sys.stderr, + ) + force_final_prompt = ( + prompt + + "\n\nFINAL INSTRUCTION: Output ONLY the final translated string. " + "No analysis. No reasoning. No extra lines." + ) + second_response = _call_openai(force_final_prompt, no_thinking=True) + try: + print( + f"[openai-debug] second response json:\n{second_response.model_dump_json(indent=2)}", + file=sys.stderr, + ) + except Exception: + print(f"[openai-debug] second response object: {second_response}", file=sys.stderr) + out = (second_response.choices[0].message.content or "").strip() + + if not out: + raise RuntimeError("OpenAI response content is empty") + + out = strip_markdown(out) + return out.strip() + except Exception as e: + raise RuntimeError(f"OpenAI API error: {str(e)}") from e + + +def extract_placeholder_names(s: str) -> List[str]: + """Extract placeholder variable names (not the full braced expression). + + For '{name}' returns ['name'] + For '{count} {count, plural, =1{hop} other{hops}}' returns ['count'] + """ + names = set() + # Get ICU variable names first + for m in ICU_VAR_RE.finditer(s): + names.add(m.group(1)) + # Get simple placeholders, but skip if they're inside ICU blocks (text forms like {hop}) + # We do this by checking if the name is also an ICU variable - if not, it's a simple placeholder + # unless it looks like a word (ICU text forms are usually short words) + for m in SIMPLE_PLACEHOLDER_RE.finditer(s): + name = m.group(1) + # Check if this appears as a simple {name} placeholder (not inside ICU) + # by looking at what comes after it + full_match = m.group(0) + pos = m.start() + # Look for pattern like {name, plural/select - if found, skip (handled by ICU_VAR_RE) + rest = s[pos:] + if re.match(r"\{\w+\s*,\s*(?:plural|select|selectordinal)", rest, re.IGNORECASE): + continue + # Check if this is likely a text form inside ICU (preceded by =X{ or other{) + before = s[:pos] + if re.search(r"(?:=\d+|zero|one|two|few|many|other)\s*$", before, re.IGNORECASE): + continue # This is a text form like "=1{hop}", skip it + names.add(name) + return sorted(names) + + +def build_prompt(text: str, target_lang: str, placeholder_names: List[str], has_icu: bool, ask_confidence: bool = False) -> str: + preserve_list = "\n".join(f"- {{{t}}}" for t in placeholder_names) if placeholder_names else "- (none)" + + icu_note = "" + if has_icu: + icu_note = ( + "ICU FORMAT RULES:\n" + f"- This text uses ICU plural/select format: {{var, plural, =1{{singular}} other{{plural}}}}\n" + "- Keep structure keywords EXACTLY: plural, select, =0, =1, =2, zero, one, two, few, many, other\n" + f"- TRANSLATE the words inside each form to {target_lang}\n" + "- Example: =1{item} other{items} -> translate 'item'/'items' but keep =1{{ }} other{{ }} structure\n\n" + ) + + if ask_confidence: + return ( + f"Translate this UI string to {target_lang}.\n\n" + "RULES:\n" + "- Placeholders like {name}, {count} must appear EXACTLY unchanged.\n" + "- Use infinitive verb forms for buttons (Save, Delete, etc.).\n" + f"- Use natural {target_lang} word order.\n" + "- Keep brand names and technical terms unchanged.\n\n" + f"{icu_note}" + f"Placeholders: {', '.join(f'{{{t}}}' for t in placeholder_names) if placeholder_names else 'none'}\n\n" + f"English: {text}\n\n" + "Respond with EXACTLY two lines:\n" + "1. The translation (no quotes)\n" + "2. Confidence score 1-5 (5=certain, 1=unsure)\n\n" + "Example response:\n" + "Guardar archivo\n" + "5" + ) + else: + return ( + f"Translate this UI string to {target_lang}. Return ONLY the translation.\n\n" + "RULES:\n" + "- Output the translated text ONLY. No markdown, no quotes, no explanations.\n" + "- Placeholders like {name}, {count} must appear EXACTLY unchanged.\n" + "- Use infinitive verb forms for buttons (Save, Delete, etc.).\n" + f"- Use natural {target_lang} word order.\n" + "- Keep brand names and technical terms unchanged.\n" + "- Translation length should be similar to the original.\n\n" + f"{icu_note}" + f"Placeholders: {', '.join(f'{{{t}}}' for t in placeholder_names) if placeholder_names else 'none'}\n\n" + f"English: {text}\n" + f"{target_lang}:" + ) + + +def parse_confidence_response(response: str) -> Tuple[str, int]: + """Parse response with translation and confidence score. + + Returns (translation, confidence) where confidence is 1-5, or 0 if unparseable. + """ + lines = response.strip().split('\n') + if len(lines) >= 2: + translation = '\n'.join(lines[:-1]).strip() # All but last line + try: + # Try to extract number from last line + last_line = lines[-1].strip() + # Handle formats like "5", "5/5", "Confidence: 5" + match = re.search(r'\b([1-5])\b', last_line) + if match: + confidence = int(match.group(1)) + return translation, confidence + except ValueError: + pass + # Fallback: treat whole response as translation with unknown confidence + return strip_markdown(response), 0 + + +def looks_like_translation_failed(src: str, out: str) -> bool: + if not out: + return True + if src.strip() == out.strip() and len(src.strip()) > 8: + return True + # Detect hallucination: output much longer than input (3x+ for short strings, 2x for longer) + src_len = len(src.strip()) + out_len = len(out.strip()) + if src_len < 50 and out_len > src_len * 3: + return True + if src_len >= 50 and out_len > src_len * 2: + return True + return False + + +def has_icu_block(s: str) -> bool: + """Check if string contains ICU plural/select block.""" + return bool(ICU_VAR_RE.search(s)) + + +def validate_preserved_tokens(src: str, out: str) -> Tuple[bool, Optional[str]]: + """Validate that placeholder names are preserved in translation.""" + src_names = extract_placeholder_names(src) + + # Check each placeholder name appears in output + for name in src_names: + # Look for {name} or {name, plural/select...} + pattern = r"\{" + re.escape(name) + r"(?:\}|\s*,)" + if not re.search(pattern, out): + return False, f"Missing placeholder: {{{name}}}" + + # If source has ICU block, output should too + if has_icu_block(src) and not has_icu_block(out): + return False, "ICU plural/select block missing in output" + + return True, None + + +def compute_confidence(src: str, out: str) -> Tuple[float, List[str]]: + """ + Compute confidence score (0.0 to 1.0) for a translation. + Returns (score, list of issues). + """ + issues = [] + score = 1.0 + + src_len = len(src.strip()) + out_len = len(out.strip()) + + # Length ratio check + if src_len > 0: + ratio = out_len / src_len + if ratio < 0.3: # Way too short + score -= 0.4 + issues.append("too_short") + elif ratio < 0.5: + score -= 0.2 + issues.append("short") + elif ratio > 2.5: # Way too long + score -= 0.4 + issues.append("too_long") + elif ratio > 1.8: + score -= 0.2 + issues.append("long") + + # Contains question mark when source doesn't (model asking questions) + if '?' in out and '?' not in src: + score -= 0.3 + issues.append("added_question") + + # Contains common LLM artifacts + artifacts = ['```', '**', 'translation:', 'here is', 'certainly', 'i can', 'i\'ll'] + out_lower = out.lower() + for artifact in artifacts: + if artifact in out_lower: + score -= 0.3 + issues.append(f"artifact:{artifact}") + break + + # Output looks like it's in English still (common words) + english_indicators = ['the ', ' is ', ' are ', ' was ', ' were ', ' have ', ' has ', 'you ', ' your '] + english_count = sum(1 for ind in english_indicators if ind in out_lower) + if english_count >= 3 and src_len > 20: + score -= 0.3 + issues.append("likely_english") + + # Contains newlines when source doesn't + if '\n' in out and '\n' not in src: + score -= 0.2 + issues.append("added_newlines") + + # ICU/placeholder validation + ok, _ = validate_preserved_tokens(src, out) + if not ok: + score -= 0.3 + issues.append("placeholder_error") + + return max(0.0, score), issues + + +# Keys to skip translation (brand names) +SKIP_KEYS = { + "appTitle", } -# Keys to skip translation -SKIP_KEYS = {"appTitle"} - -# Manual translations for complex strings +# Manual translations for problematic strings (key -> {locale: translation}) MANUAL_TRANSLATIONS: Dict[str, Dict[str, str]] = { "repeater_daysHoursMinsSecs": { "es": "{days} días {hours}h {minutes}m {seconds}s", @@ -106,126 +460,100 @@ MANUAL_TRANSLATIONS: Dict[str, Dict[str, str]] = { } -def http_post_json(url: str, payload: Dict[str, Any], timeout_s: float) -> Dict[str, Any]: - data = json.dumps(payload).encode("utf-8") - req = request.Request(url, data=data, headers={"Content-Type": "application/json"}, method="POST") - with request.urlopen(req, timeout=timeout_s) as resp: - return json.loads(resp.read().decode("utf-8")) - - -def ollama_generate(cfg: OllamaConfig, prompt: str) -> str: - url = cfg.host.rstrip("/") + "/api/generate" - payload = { - "model": cfg.model, - "prompt": prompt, - "stream": False, - "options": {"temperature": cfg.temperature}, - } - resp = http_post_json(url, payload, cfg.timeout_s) - return resp.get("response", "").strip() - - -def extract_placeholder_names(s: str) -> List[str]: - """Extract placeholder variable names from string.""" - names = set() - - # Get ICU variable names - for m in ICU_VAR_RE.finditer(s): - names.add(m.group(1)) - - # Get simple placeholders (excluding ICU text forms) - for m in SIMPLE_PLACEHOLDER_RE.finditer(s): - name = m.group(1) - pos = m.start() - rest = s[pos:] - - # Skip if this is part of an ICU block - if re.match(r"\{\w+\s*,\s*(?:plural|select|selectordinal)", rest, re.IGNORECASE): - continue - - # Skip if this is a text form inside ICU (preceded by =X{ or other{) - before = s[:pos] - if re.search(r"(?:=\d+|zero|one|two|few|many|other)\s*$", before, re.IGNORECASE): - continue - - names.add(name) - - return sorted(names) - - -def has_icu_block(s: str) -> bool: - """Check if string contains ICU plural/select block.""" - return bool(ICU_VAR_RE.search(s)) - - -def build_prompt(text: str, target_lang: str, target_code: str, placeholder_names: List[str], has_icu: bool) -> str: - """Build TranslateGemma-compatible prompt with placeholder preservation instructions.""" - # Build instructions for placeholder preservation - instructions = [] - if placeholder_names: - placeholders = ', '.join(f'{{{t}}}' for t in placeholder_names) - instructions.append(f"CRITICAL: Keep these placeholders EXACTLY as they appear: {placeholders}") - if has_icu: - instructions.append("CRITICAL: Preserve ICU message format structure (plural, select, =0, =1, other, etc.). Only translate the text inside the forms.") - - # Add instructions to the system prompt, not to the text itself - instruction_text = "\n".join(instructions) if instructions else "" - separator = "\n" if instruction_text else "" - - # TranslateGemma expects this exact format (note the two blank lines before text) - return f"""You are a professional English (en) to {target_lang} ({target_code}) translator. Your goal is to accurately convey the meaning and nuances of the original English text while adhering to {target_lang} grammar, vocabulary, and cultural sensitivities. -Produce only the {target_lang} translation, without any additional explanations or commentary.{separator}{instruction_text} -Please translate the following English text into {target_lang}: - - -{text}""" - - -def validate_preserved_tokens(src: str, out: str) -> Tuple[bool, Optional[str]]: - """Validate that placeholder names are preserved.""" - src_names = extract_placeholder_names(src) - - for name in src_names: - pattern = r"\{" + re.escape(name) + r"(?:\}|\s*,)" - if not re.search(pattern, out): - return False, f"Missing placeholder: {{{name}}}" - - if has_icu_block(src) and not has_icu_block(out): - return False, "ICU plural/select block missing" - - return True, None +def is_translatable_entry(key: str, value: Any) -> bool: + if key == "@@locale": + return False + if key in SKIP_KEYS: + return False + if key.startswith("@"): + return False + if not isinstance(value, str): + return False + if value.strip() == "": + return False + return True def translate_one( key: str, text: str, target_lang: str, - target_code: str, - cfg: OllamaConfig, + generate_fn, # ← new: callable that takes config & prompt → str + config, # ← either OllamaConfig or GroqConfig retries: int, backoff_s: float, - fallback_cfg: Optional[OllamaConfig] = None, + fallback_generate_fn=None, + fallback_config=None, + confidence_threshold: float = 0.7, + model_confidence_threshold: int = 4, + ask_model_confidence: bool = True, ) -> Tuple[str, str, Optional[str], bool]: - """Translate a single string. Returns (key, translated_text, error_or_none, used_fallback).""" + """ + Translate a single string. + Returns (key, translated_text, error_or_none, used_fallback_model). + """ placeholder_names = extract_placeholder_names(text) text_has_icu = has_icu_block(text) - prompt = build_prompt(text, target_lang, target_code, placeholder_names, text_has_icu) + + # Ask for confidence if we have a fallback model + should_ask_confidence = ask_model_confidence and fallback_config and fallback_config.model != config.model + prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu, ask_confidence=should_ask_confidence) + used_fallback = False last_err: Optional[str] = None for attempt in range(retries + 1): try: - out = ollama_generate(cfg, prompt) - - # Validate placeholders + raw_out = generate_fn(config, prompt) + + # Parse confidence if we asked for it + if should_ask_confidence: + out, model_confidence = parse_confidence_response(raw_out) + else: + out = raw_out + model_confidence = 5 # Assume high confidence if not asked + ok, why = validate_preserved_tokens(text, out) if not ok: last_err = f"Validation failed: {why}" - if attempt < retries: - time.sleep(backoff_s * (attempt + 1)) - continue + # Retry without confidence format for simpler response + prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu, ask_confidence=False) + prompt = ( + prompt + + "\n\nIMPORTANT: You MUST keep every {...} segment exactly unchanged. " + "If you cannot, return the original text unchanged." + ) raise ValueError(last_err) - return key, out, None, False + if looks_like_translation_failed(text, out) and attempt < retries: + last_err = "Output identical/suspicious; retrying" + time.sleep(backoff_s * (attempt + 1)) + continue + + # Check if model reported low confidence - use fallback + if model_confidence > 0 and model_confidence < model_confidence_threshold and fallback_config: + fallback_prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu, ask_confidence=False) + fallback_out = generate_fn(fallback_config, fallback_prompt) + fallback_ok, _ = validate_preserved_tokens(text, fallback_out) + if fallback_ok and not looks_like_translation_failed(text, fallback_out): + return key, fallback_out, None, True + + # Also check computed confidence and use fallback model if needed + confidence, issues = compute_confidence(text, out) + if confidence < confidence_threshold and fallback_config and fallback_config.model != config.model: + # Low confidence - try with bigger model + fallback_prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu) + fallback_out = generate_fn(fallback_config, fallback_prompt) + fallback_ok, _ = validate_preserved_tokens(text, fallback_out) + fallback_conf, _ = compute_confidence(text, fallback_out) + + if fallback_ok and fallback_conf > confidence: + # Fallback is better + return key, fallback_out, None, True + elif fallback_ok and not ok: + # Original failed validation but fallback passed + return key, fallback_out, None, True + + return key, out, None, used_fallback except Exception as e: last_err = str(e) @@ -233,74 +561,21 @@ def translate_one( time.sleep(backoff_s * (attempt + 1)) continue - # Try fallback model if available - if fallback_cfg: + # Last resort: try fallback model + if fallback_config and fallback_config.model != config.model: try: - fallback_prompt = build_prompt(text, target_lang, target_code, placeholder_names, text_has_icu) - fallback_out = ollama_generate(fallback_cfg, fallback_prompt) + fallback_prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu) + fallback_out = generate_fn(fallback_config, fallback_prompt) fallback_ok, _ = validate_preserved_tokens(text, fallback_out) - if fallback_ok: + if fallback_ok and not looks_like_translation_failed(text, fallback_out): return key, fallback_out, None, True except Exception: pass - # Fallback to original - return key, text, last_err, False - - -def is_translatable_entry(key: str, value: Any) -> bool: - """Check if an entry should be translated.""" - if key == "@@locale" or key.startswith("@") or key in SKIP_KEYS: - return False - return isinstance(value, str) and value.strip() != "" - - -def find_missing_keys(source_data: Dict[str, Any], target_data: Dict[str, Any]) -> List[str]: - """Find keys that are missing or empty in target.""" - missing = [] - for key in source_data: - if key == "@@locale" or key.startswith("@"): - continue - if key not in target_data or (isinstance(target_data.get(key), str) and target_data[key].strip() == ""): - missing.append(key) - return missing - - -def find_keys_still_template_copy(source_data: Dict[str, Any], target_data: Dict[str, Any]) -> List[str]: - """Keys whose value is still exactly the same as the template (typical after cp app_en.arb → app_xx.arb).""" - out: List[str] = [] - for key in source_data: - if key == "@@locale" or key.startswith("@"): - continue - src = source_data.get(key) - if not is_translatable_entry(key, src): - continue - if not isinstance(src, str): - continue - tgt = target_data.get(key) - if not isinstance(tgt, str) or tgt.strip() == "": - out.append(key) - elif tgt == src: - out.append(key) - return out - - -def get_all_locale_files(l10n_dir: str, template_file: str) -> List[Tuple[str, str]]: - """Find all locale .arb files excluding template. Returns [(locale_code, file_path)].""" - locales = [] - template_basename = os.path.basename(template_file) - - for filename in os.listdir(l10n_dir): - if filename.endswith('.arb') and filename != template_basename: - if filename.startswith('app_'): - locale = filename[4:-4] # app_es.arb -> es - locales.append((locale, os.path.join(l10n_dir, filename))) - - return sorted(locales) + return key, text, last_err, False # fallback to original on failure def fmt_duration(seconds: float) -> str: - """Format duration as human-readable string.""" if seconds < 60: return f"{seconds:.1f}s" m = int(seconds // 60) @@ -312,53 +587,330 @@ def fmt_duration(seconds: float) -> str: return f"{h}h {m2}m" +def find_missing_keys(source_data: Dict[str, Any], target_data: Dict[str, Any]) -> List[str]: + """Find keys that are in source but not in target, or have empty values (excluding metadata keys).""" + missing = [] + for key in source_data: + if key == "@@locale": + continue + if key.startswith("@"): + continue + if key not in target_data: + missing.append(key) + elif isinstance(target_data.get(key), str) and target_data[key].strip() == "": + # Also include keys with empty string values + missing.append(key) + return missing + + +def get_all_locale_files(l10n_dir: str, template_file: str) -> List[Tuple[str, str]]: + """Find all locale .arb files in the directory, excluding the template. + + Returns list of (locale_code, file_path) tuples. + """ + locales = [] + template_basename = os.path.basename(template_file) + + for filename in os.listdir(l10n_dir): + if not filename.endswith('.arb'): + continue + if filename == template_basename: + continue + # Extract locale from filename like app_es.arb -> es + if filename.startswith('app_') and filename.endswith('.arb'): + locale = filename[4:-4] # Remove 'app_' prefix and '.arb' suffix + filepath = os.path.join(l10n_dir, filename) + locales.append((locale, filepath)) + + return sorted(locales) + + +def main() -> int: + ap = argparse.ArgumentParser() + ap.add_argument("--in", dest="in_path", required=True, help="Input .arb/.json file path (source/template)") + ap.add_argument("--out", dest="out_path", default=None, help="Output .arb/.json file path (required unless using --l10n-dir)") + ap.add_argument("--to-locale", default=None, help="Target locale code, e.g. es, fr, de (required unless using --l10n-dir)") + ap.add_argument("--l10n-dir", default=None, help="Directory containing locale .arb files. When set, translates all locales.") + ap.add_argument("--missing-only", action="store_true", help="Only translate keys missing from target file") + ap.add_argument("--target-lang", default=None, help="Target language name for the model, e.g. Spanish (defaults from locale)") + ap.add_argument("--model", default="gemma3:4b", help="Model name for selected backend") + ap.add_argument("--backend", choices=["ollama", "groq", "openai"], default="ollama", + help="Inference backend to use") + ap.add_argument("--groq-api-key", default=None, + help="Groq API key (can also be set via GROQ_API_KEY env var)") + ap.add_argument("--openai-api-key", default=None, + help="OpenAI API key (for local servers this can often be omitted)") + ap.add_argument("--openai-base-url", default="http://localhost:1234/v1", + help="OpenAI-compatible base URL for local LLM server") + ap.add_argument("--fallback-model", default=None, help="Larger model to use for low-confidence translations") + ap.add_argument("--confidence-threshold", type=float, default=0.7, help="Computed confidence threshold to trigger fallback (0.0-1.0)") + ap.add_argument("--model-confidence-threshold", type=int, default=4, help="Model self-reported confidence threshold (1-5, use fallback if below)") + ap.add_argument("--retry-model", default=None, help="Model to use for end-of-run retries") + ap.add_argument("--host", default="http://localhost:11434", help="Ollama host") + ap.add_argument("--timeout", type=float, default=120.0, help="HTTP timeout seconds") + ap.add_argument("--temperature", type=float, default=0.2, help="Model temperature") + ap.add_argument("--num-ctx", type=int, default=4096, help="Context size") + ap.add_argument("--num-predict", type=int, default=256, help="Max tokens to generate") + ap.add_argument("--top-p", type=float, default=0.9, help="Top-p") + ap.add_argument("--concurrency", type=int, default=4, help="Parallel requests") + ap.add_argument("--retries", type=int, default=2, help="Retries per string") + ap.add_argument("--backoff", type=float, default=0.6, help="Backoff seconds base") + ap.add_argument("--dry-run", action="store_true", help="Do not write file; just print summary") + ap.add_argument("--progress-every", type=int, default=1, help="Print progress every N completed strings (default: 1)") + args = ap.parse_args() + + locale_map = { + "es": "Spanish", + "fr": "French", + "de": "German", + "it": "Italian", + "pt": "Portuguese", + "pt-BR": "Brazilian Portuguese", + "ja": "Japanese", + "ko": "Korean", + "zh": "Chinese (Simplified)", + "zh-Hant": "Chinese (Traditional)", + "ru": "Russian", + "uk": "Ukrainian", + "ar": "Arabic", + "hi": "Hindi", + "tr": "Turkish", + "nl": "Dutch", + "sv": "Swedish", + "no": "Norwegian", + "da": "Danish", + "fi": "Finnish", + "pl": "Polish", + "cs": "Czech", + "sk": "Slovak", + "sl": "Slovenian", + "bg": "Bulgarian", + "el": "Greek", + "he": "Hebrew", + "th": "Thai", + "vi": "Vietnamese", + "id": "Indonesian", + } + + # Read source/template file + try: + with open(args.in_path, "r", encoding="utf-8") as f: + source_data = json.load(f) + except Exception as e: + print(f"Failed to read input: {e}", file=sys.stderr) + return 2 + + if not isinstance(source_data, dict): + print("Input JSON must be an object at top-level.", file=sys.stderr) + return 2 + + # If --l10n-dir is provided, process all locale files + if args.l10n_dir: + locales = get_all_locale_files(args.l10n_dir, args.in_path) + if not locales: + print(f"No locale files found in {args.l10n_dir}", file=sys.stderr) + return 1 + + print(f"Found {len(locales)} locale file(s) to process") + + total_translated = 0 + for locale_code, locale_path in locales: + target_lang = locale_map.get(locale_code, locale_code) + + # Read existing target file + try: + with open(locale_path, "r", encoding="utf-8") as f: + target_data = json.load(f) + except Exception as e: + print(f" [{locale_code}] Failed to read {locale_path}: {e}") + continue + + if args.missing_only: + missing_keys = find_missing_keys(source_data, target_data) + if not missing_keys: + print(f" [{locale_code}] No missing keys") + continue + print(f" [{locale_code}] {len(missing_keys)} missing key(s): {', '.join(missing_keys[:5])}{'...' if len(missing_keys) > 5 else ''}") + else: + missing_keys = None + + # Run translation for this locale + result = translate_locale( + source_data=source_data, + target_data=target_data, + target_locale=locale_code, + target_lang=target_lang, + out_path=locale_path, + args=args, + locale_map=locale_map, + missing_keys=missing_keys, + ) + total_translated += result + + print(f"\nTotal: {total_translated} string(s) translated across {len(locales)} locale(s)") + return 0 + + # Single locale mode - validate required args + if not args.out_path: + print("--out is required when not using --l10n-dir", file=sys.stderr) + return 1 + if not args.to_locale: + print("--to-locale is required when not using --l10n-dir", file=sys.stderr) + return 1 + + target_lang = args.target_lang or locale_map.get(args.to_locale, args.to_locale) + + # Read existing target file if --missing-only and file exists + target_data: Dict[str, Any] = {} + missing_keys: Optional[List[str]] = None + if args.missing_only: + if os.path.exists(args.out_path): + try: + with open(args.out_path, "r", encoding="utf-8") as f: + target_data = json.load(f) + missing_keys = find_missing_keys(source_data, target_data) + if not missing_keys: + print(f"No missing keys in {args.out_path}") + return 0 + print(f"Found {len(missing_keys)} missing key(s) to translate") + except Exception as e: + print(f"Failed to read target file: {e}", file=sys.stderr) + return 2 + else: + print(f"Target file {args.out_path} does not exist. Will translate all strings.") + + result = translate_locale( + source_data=source_data, + target_data=target_data, + target_locale=args.to_locale, + target_lang=target_lang, + out_path=args.out_path, + args=args, + locale_map=locale_map, + missing_keys=missing_keys, + ) + return 0 if result >= 0 else 1 + + def translate_locale( source_data: Dict[str, Any], target_data: Dict[str, Any], target_locale: str, target_lang: str, - target_code: str, out_path: str, args, + locale_map: Dict[str, str], missing_keys: Optional[List[str]] = None, ) -> int: """Translate a single locale. Returns number of strings translated.""" - cfg = OllamaConfig( - host=args.host, - model=args.model, - timeout_s=args.timeout, - temperature=args.temperature, - ) + if args.backend == "groq": + if not GROQ_AVAILABLE: + print("Error: Groq backend requested but 'groq' package is not installed.", file=sys.stderr) + print("Run: pip install groq", file=sys.stderr) + return -1 - fallback_cfg = None - if args.fallback_model: - fallback_cfg = OllamaConfig( + api_key = args.groq_api_key or os.environ.get("GROQ_API_KEY") + if not api_key: + print("Error: --groq-api-key or GROQ_API_KEY environment variable is required", file=sys.stderr) + return 1 + + client = Groq(api_key=api_key) + + cfg = GroqConfig( + client=client, + model=args.model, + temperature=args.temperature, + max_tokens=args.num_predict, # reusing the same flag + top_p=args.top_p, + ) + generate_fn = groq_generate + + fallback_cfg = None + fallback_generate_fn = None + if args.fallback_model: + print("Warning: --fallback-model not yet supported with Groq backend", file=sys.stderr) + + elif args.backend == "openai": + if not OPENAI_AVAILABLE: + print("Error: OpenAI backend requested but 'openai' package is not installed.", file=sys.stderr) + print("Run: pip install openai", file=sys.stderr) + return -1 + + # Local OpenAI-compatible servers often accept any non-empty API key. + api_key = args.openai_api_key or os.environ.get("OPENAI_API_KEY") or "local" + client = OpenAI(api_key=api_key, base_url=args.openai_base_url) + + cfg = OpenAIConfig( + client=client, + model=args.model, + temperature=args.temperature, + max_tokens=args.num_predict, + top_p=args.top_p, + ) + generate_fn = openai_generate + + fallback_cfg = None + fallback_generate_fn = None + if args.fallback_model: + fallback_cfg = OpenAIConfig( + client=client, + model=args.fallback_model, + temperature=args.temperature, + max_tokens=args.num_predict, + top_p=args.top_p, + ) + fallback_generate_fn = openai_generate + + else: # ollama + cfg = OllamaConfig( host=args.host, - model=args.fallback_model, + model=args.model, timeout_s=args.timeout, temperature=args.temperature, + num_ctx=args.num_ctx, + num_predict=args.num_predict, + top_p=args.top_p, ) + generate_fn = ollama_generate - # Start with target data or source data - out_data: Dict[str, Any] = dict(target_data) if target_data else dict(source_data) + fallback_cfg = None + fallback_generate_fn = None + if args.fallback_model: + fallback_cfg = OllamaConfig( + host=args.host, + model=args.fallback_model, + timeout_s=args.timeout, + temperature=args.temperature, + num_ctx=args.num_ctx, + num_predict=args.num_predict, + top_p=args.top_p, + ) + fallback_generate_fn = ollama_generate + + # Start with target data (preserves existing translations) or source data + if target_data: + out_data: Dict[str, Any] = dict(target_data) + else: + out_data: Dict[str, Any] = dict(source_data) out_data["@@locale"] = target_locale # Build list of items to translate if missing_keys is not None: + # Only translate missing keys items: List[Tuple[str, str]] = [ - (k, source_data[k]) for k in missing_keys + (k, source_data[k]) for k in missing_keys if is_translatable_entry(k, source_data.get(k)) ] - # Copy metadata for missing items + # Also copy over any metadata keys for missing items for key in missing_keys: meta_key = f"@{key}" if meta_key in source_data: out_data[meta_key] = source_data[meta_key] else: items: List[Tuple[str, str]] = [(k, v) for k, v in source_data.items() if is_translatable_entry(k, v)] - - # Apply manual translations + + # Apply manual translations first manual_count = 0 items_to_translate: List[Tuple[str, str]] = [] for k, v in items: @@ -367,73 +919,175 @@ def translate_locale( manual_count += 1 else: items_to_translate.append((k, v)) - + if manual_count > 0: print(f"Applied {manual_count} manual translation(s)") - + total = len(items_to_translate) + if total == 0 and manual_count == 0: + print("No translatable string entries found (excluding @@locale and @metadata).") + return 0 + if total == 0: - if manual_count > 0: - print("All strings handled by manual translations.") - return manual_count - - fallback_info = f" (fallback: {args.fallback_model})" if args.fallback_model else "" - print(f"Translating {total} strings -> {target_lang} using {cfg.model}{fallback_info} (concurrency={args.concurrency})") - + print("All strings handled by manual translations.") + else: + fallback_info = f" (fallback: {args.fallback_model})" if args.fallback_model else "" + print(f"Translating {total} strings -> {target_lang} using {cfg.model}{fallback_info} (concurrency={args.concurrency})") + start = time.time() + failures: List[Tuple[str, str]] = [] - translated_ok = manual_count + translated_ok = manual_count # Count manual translations as OK fallback_used = 0 completed = 0 - with ThreadPoolExecutor(max_workers=max(1, args.concurrency)) as ex: - future_to_key = { + # Build a lookup for original text by key + items_dict: Dict[str, str] = dict(items_to_translate) + + # Submit all tasks up front + if total > 0: + with ThreadPoolExecutor(max_workers=max(1, args.concurrency)) as ex: + future_to_key = { ex.submit( - translate_one, - key=k, - text=v, - target_lang=target_lang, - target_code=target_code, - cfg=cfg, - retries=args.retries, - backoff_s=args.backoff, - fallback_cfg=fallback_cfg, - ): k - for (k, v) in items_to_translate - } + translate_one, + key=k, + text=v, + target_lang=target_lang, + generate_fn=generate_fn, + config=cfg, + retries=args.retries, + backoff_s=args.backoff, + fallback_generate_fn=fallback_generate_fn, + fallback_config=fallback_cfg, + confidence_threshold=args.confidence_threshold, + model_confidence_threshold=args.model_confidence_threshold, + ask_model_confidence=bool(args.fallback_model), + ): k + for (k, v) in items_to_translate + } + print(f"Submitted {len(future_to_key)} translation tasks...") + for fut in as_completed(future_to_key): + k, translated, err, used_fallback = fut.result() + out_data[k] = translated - for fut in as_completed(future_to_key): - k, translated, err, used_fallback = fut.result() - out_data[k] = translated - - completed += 1 - if err: - failures.append((k, err)) - status = "FAIL" - else: - translated_ok += 1 - if used_fallback: - fallback_used += 1 - status = "OK*" + completed += 1 + if err: + failures.append((k, err)) + status = "FAIL" else: - status = "OK" + translated_ok += 1 + if used_fallback: + fallback_used += 1 + status = "OK*" # asterisk indicates fallback model was used + else: + status = "OK" - if completed % args.progress_every == 0 or completed == total: - elapsed = time.time() - start - rate = completed / elapsed if elapsed > 0 else 0.0 - remaining = (total - completed) / rate if rate > 0 else 0.0 - print(f"[{completed:>4}/{total}] {status:<4} {k} | elapsed {fmt_duration(elapsed)} | ETA {fmt_duration(remaining)}") + if args.progress_every > 0 and (completed % args.progress_every == 0 or completed == total): + elapsed = time.time() - start + rate = completed / elapsed if elapsed > 0 else 0.0 + remaining = (total - completed) / rate if rate > 0 else 0.0 + # Keep it single-line friendly but readable. + print( + f"[{completed:>4}/{total}] {status:<4} {k} | " + f"elapsed {fmt_duration(elapsed)} | ETA {fmt_duration(remaining)}" + ) elapsed = time.time() - start - fallback_msg = f", fallback_used={fallback_used}" if fallback_used > 0 else "" - print(f"Done in {fmt_duration(elapsed)}. OK={translated_ok}{fallback_msg}, errors={len(failures)}") + fallback_msg = f", used_fallback_model={fallback_used}" if fallback_used > 0 else "" + print(f"Done in {fmt_duration(elapsed)}. OK={translated_ok}{fallback_msg}, errors={len(failures)}: {translated}") + + # Retry failed translations at the end with increasing temperature + retry_round = 1 + max_end_retries = 3 + retry_model = args.retry_model or args.model + + while failures and retry_round <= max_end_retries: + # Increase temperature for each retry round + retry_temp = min(cfg.temperature + (0.2 * retry_round), 1.0) + print(f"\n--- Retry round {retry_round}/{max_end_retries} for {len(failures)} failed key(s) (model={retry_model}, temp={retry_temp:.1f}) ---") + retry_items = [(k, items_dict[k]) for k, _ in failures] + failures = [] + retry_completed = 0 + retry_total = len(retry_items) + retry_start = time.time() + if args.backend == "groq": + retry_cfg = GroqConfig( + client=cfg.client, + model=retry_model, + temperature=retry_temp, + max_tokens=cfg.max_tokens, + top_p=cfg.top_p, + ) + retry_generate_fn = groq_generate + elif args.backend == "openai": + retry_cfg = OpenAIConfig( + client=cfg.client, + model=retry_model, + temperature=retry_temp, + max_tokens=cfg.max_tokens, + top_p=cfg.top_p, + ) + retry_generate_fn = openai_generate + else: + retry_cfg = OllamaConfig( + host=cfg.host, + model=retry_model, + timeout_s=cfg.timeout_s, + temperature=retry_temp, + num_ctx=cfg.num_ctx, + num_predict=cfg.num_predict, + top_p=cfg.top_p, + ) + retry_generate_fn = ollama_generate + with ThreadPoolExecutor(max_workers=max(1, args.concurrency)) as ex: + future_to_key = { + ex.submit( + translate_one, + key=k, + text=v, + target_lang=target_lang, + config=retry_cfg, + generate_fn=retry_generate_fn, + retries=args.retries, + backoff_s=args.backoff, + ): k + for (k, v) in retry_items + } + + for fut in as_completed(future_to_key): + k, translated, err, used_fb = fut.result() + out_data[k] = translated + + retry_completed += 1 + if err: + failures.append((k, err)) + status = "FAIL" + else: + translated_ok += 1 + status = "OK" + + if args.progress_every > 0 and (retry_completed % args.progress_every == 0 or retry_completed == retry_total): + elapsed = time.time() - retry_start + rate = retry_completed / elapsed if elapsed > 0 else 0.0 + remaining = (retry_total - retry_completed) / rate if rate > 0 else 0.0 + print( + f"[{retry_completed:>4}/{retry_total}] {status:<4} {k} | " + f"elapsed {fmt_duration(elapsed)} | ETA {fmt_duration(remaining)}" + ) + + retry_elapsed = time.time() - retry_start + print(f"Retry round {retry_round} done in {fmt_duration(retry_elapsed)}. Remaining failures: {len(failures)}") + retry_round += 1 + + total_elapsed = time.time() - start + print(f"\nTotal time: {fmt_duration(total_elapsed)}. OK={translated_ok}, final fallback={len(failures)}") if failures: - print(f"{len(failures)} translation(s) kept original English:") - for k, err in failures[:20]: + print("Fallback keys (kept original English due to errors):") + for k, err in failures[:60]: print(f" - {k}: {err}") - if len(failures) > 20: - print(f" ... and {len(failures) - 20} more") + if len(failures) > 60: + print(f" ... and {len(failures) - 60} more") if args.dry_run: print("Dry run: not writing output file.") @@ -451,151 +1105,5 @@ def translate_locale( return translated_ok -def main() -> int: - ap = argparse.ArgumentParser(description="Translate ARB files using TranslateGemma") - ap.add_argument("--in", dest="in_path", required=True, help="Input .arb file (source/template)") - ap.add_argument("--out", dest="out_path", help="Output .arb file (required unless using --l10n-dir)") - ap.add_argument("--to-locale", help="Target locale code (es, fr, de, etc.)") - ap.add_argument("--l10n-dir", help="Directory with locale files (translates all locales)") - ap.add_argument("--missing-only", action="store_true", help="Only translate missing keys") - ap.add_argument( - "--copy-of-template", - action="store_true", - help="Only translate keys whose target text still equals app_en (use for new locales copied from English)", - ) - ap.add_argument( - "--only-locales", - help="Comma-separated locale codes to process with --l10n-dir (e.g. hu,ja,ko)", - ) - ap.add_argument("--model", default="translategemma:latest", help="Ollama model (translategemma:latest or specific versions)") - ap.add_argument("--fallback-model", help="Fallback model for failed translations (e.g., translategemma:27b)") - ap.add_argument("--host", default="http://localhost:11434", help="Ollama host") - ap.add_argument("--timeout", type=float, default=120.0, help="HTTP timeout seconds") - ap.add_argument("--temperature", type=float, default=0.0, help="Model temperature (0.0 for deterministic)") - ap.add_argument("--concurrency", type=int, default=4, help="Parallel requests") - ap.add_argument("--retries", type=int, default=2, help="Retries per string") - ap.add_argument("--backoff", type=float, default=0.6, help="Backoff seconds base") - ap.add_argument("--dry-run", action="store_true", help="Don't write output") - ap.add_argument("--progress-every", type=int, default=1, help="Print progress every N strings") - args = ap.parse_args() - - # Read source file - try: - with open(args.in_path, "r", encoding="utf-8") as f: - source_data = json.load(f) - except Exception as e: - print(f"Failed to read input: {e}", file=sys.stderr) - return 2 - - if not isinstance(source_data, dict): - print("Input JSON must be an object at top-level.", file=sys.stderr) - return 2 - - if args.missing_only and args.copy_of_template: - print("Use only one of --missing-only or --copy-of-template", file=sys.stderr) - return 2 - - only_locales: Optional[set] = None - if args.only_locales: - only_locales = {x.strip() for x in args.only_locales.split(",") if x.strip()} - - # Process all locales if --l10n-dir is provided - if args.l10n_dir: - locales = get_all_locale_files(args.l10n_dir, args.in_path) - if not locales: - print(f"No locale files found in {args.l10n_dir}", file=sys.stderr) - return 1 - - if only_locales is not None: - locales = [(c, p) for c, p in locales if c in only_locales] - missing = only_locales - {c for c, _ in locales} - if missing: - print(f"Warning: no app_*.arb for locale code(s): {', '.join(sorted(missing))}", file=sys.stderr) - - print(f"Found {len(locales)} locale file(s) to process") - - total_translated = 0 - for locale_code, locale_path in locales: - lang_name, lang_code = LOCALE_MAP.get(locale_code, (locale_code, locale_code)) - - try: - with open(locale_path, "r", encoding="utf-8") as f: - target_data = json.load(f) - except Exception as e: - print(f" [{locale_code}] Failed to read {locale_path}: {e}") - continue - - missing_keys: Optional[List[str]] - if args.copy_of_template: - missing_keys = find_keys_still_template_copy(source_data, target_data) - if not missing_keys: - print(f" [{locale_code}] No keys still matching template") - continue - print(f" [{locale_code}] {len(missing_keys)} key(s) still same as template") - elif args.missing_only: - missing_keys = find_missing_keys(source_data, target_data) - if not missing_keys: - print(f" [{locale_code}] No missing keys") - continue - print(f" [{locale_code}] {len(missing_keys)} missing key(s)") - else: - missing_keys = None - - result = translate_locale( - source_data=source_data, - target_data=target_data, - target_locale=locale_code, - target_lang=lang_name, - target_code=lang_code, - out_path=locale_path, - args=args, - missing_keys=missing_keys, - ) - total_translated += result - - print(f"\nTotal: {total_translated} string(s) translated across {len(locales)} locale(s)") - return 0 - - # Single locale mode - if not args.out_path or not args.to_locale: - print("--out and --to-locale are required when not using --l10n-dir", file=sys.stderr) - return 1 - - lang_name, lang_code = LOCALE_MAP.get(args.to_locale, (args.to_locale, args.to_locale)) - - # Read existing target file if --missing-only or --copy-of-template - target_data: Dict[str, Any] = {} - missing_keys: Optional[List[str]] = None - if (args.missing_only or args.copy_of_template) and os.path.exists(args.out_path): - try: - with open(args.out_path, "r", encoding="utf-8") as f: - target_data = json.load(f) - if args.copy_of_template: - missing_keys = find_keys_still_template_copy(source_data, target_data) - label = "still matching template" - else: - missing_keys = find_missing_keys(source_data, target_data) - label = "missing" - if not missing_keys: - print(f"No {label} keys in {args.out_path}") - return 0 - print(f"Found {len(missing_keys)} {label} key(s) to translate") - except Exception as e: - print(f"Failed to read target file: {e}", file=sys.stderr) - return 2 - - result = translate_locale( - source_data=source_data, - target_data=target_data, - target_locale=args.to_locale, - target_lang=lang_name, - target_code=lang_code, - out_path=args.out_path, - args=args, - missing_keys=missing_keys, - ) - return 0 if result >= 0 else 1 - - if __name__ == "__main__": - raise SystemExit(main()) + raise SystemExit(main()) \ No newline at end of file