From d104edd65c5b93157e95cbbd5f0dc00de8f39767 Mon Sep 17 00:00:00 2001
From: Winston Lowe <wel97459@gmail.com>
Date: Sun, 10 May 2026 16:04:39 -0700
Subject: [PATCH] added Groq and openAI APIs

---
 tools/translate.py | 1348 ++++++++++++++++++++++++++++++--------------
 1 file changed, 928 insertions(+), 420 deletions(-)

diff --git a/tools/translate.py b/tools/translate.py
index 905d435e..aa3fd82c 100644
--- a/tools/translate.py
+++ b/tools/translate.py
@@ -1,25 +1,58 @@
 #!/usr/bin/env python3
 """
-translate_arb_with_translategemma.py
+translate_arb_with_ollama.py
 
-Translates ARB/JSON localization files using TranslateGemma via Ollama.
-Preserves placeholders like {deviceName} and ICU plural/select formats.
+Translates ARB/JSON localization values using a local Ollama model, while:
+- preserving keys
+- skipping "@@locale" and all "@key" metadata blocks
+- preserving placeholders like {deviceName}, {count, plural, ...}
+- writing a new file with updated @@locale
+- printing progress as it runs
 
 Usage:
   # Translate all strings:
-  python translate.py --in lib/l10n/app_en.arb --out lib/l10n/app_es.arb --to-locale es
+  python translate.py \
+    --in ../lib/l10n/app_en.arb \
+    --out ../lib/l10n/app_es.arb \
+    --to-locale es \
+    --model ministral-3:latest \
+    --temperature 0 \
+    --concurrency 4
 
-  # Translate only missing strings:
-  python translate.py --in lib/l10n/app_en.arb --out lib/l10n/app_es.arb --to-locale es --missing-only
+  # Translate only missing/untranslated strings:
+  python translate.py \
+    --in ../lib/l10n/app_en.arb \
+    --out ../lib/l10n/app_es.arb \
+    --to-locale es \
+    --missing-only \
+    --model ministral-3:latest
 
   # Translate all locales (missing strings only):
-  python translate.py --in lib/l10n/app_en.arb --l10n-dir lib/l10n --missing-only
+  python translate.py \
+    --in ../lib/l10n/app_en.arb \
+    --l10n-dir ../lib/l10n \
+    --missing-only \
+    --model ministral-3:latest
 
-  # New locales copied from app_en.arb still match English → --missing-only skips them.
-  # Translate every key that still equals the template (e.g. hu, ja, ko):
-  python translate.py --in lib/l10n/app_en.arb --l10n-dir lib/l10n --copy-of-template --only-locales hu,ja,ko
+  # Translate using Groq (very fast):
+    python translate.py \
+    --in ../lib/l10n/app_en.arb 
+    --l10n-dir ../lib/l10n \
+    --missing-only \
+    --backend groq --model llama-3.3-70b-versatile \
+    --temperature 0.1 --concurrency 12
+
+    # Translate using local OpenAI-compatible server (LM Studio/llama.cpp/vLLM):
+    python translate.py \
+        --in ../lib/l10n/app_en.arb \
+        --l10n-dir ../lib/l10n \
+        --missing-only \
+        --backend openai \
+        --openai-base-url http://localhost:1234/v1 \
+        --model local-model-name
 """
 
+from __future__ import annotations
 import argparse
 import json
 import os
@@ -31,9 +64,21 @@ from dataclasses import dataclass
 from typing import Any, Dict, List, Tuple, Optional
 from urllib import request
 
+try:
+    from groq import Groq
+    GROQ_AVAILABLE = True
+except ImportError:
+    GROQ_AVAILABLE = False
 
-# Placeholder patterns
+try:
+    from openai import OpenAI
+    OPENAI_AVAILABLE = True
+except ImportError:
+    OPENAI_AVAILABLE = False
+
+# Simple placeholder like {name}, {count}, {deviceName}
 SIMPLE_PLACEHOLDER_RE = re.compile(r"\{(\w+)\}")
+# ICU plural/select variable name extraction: {count, plural, ...} or {gender, select, ...}
 ICU_VAR_RE = re.compile(r"\{(\w+)\s*,\s*(?:plural|select|selectordinal)\s*,", re.IGNORECASE)
 
 
@@ -43,47 +88,356 @@ class OllamaConfig:
     model: str
     timeout_s: float
     temperature: float
+    num_ctx: int
+    num_predict: int
+    top_p: float
+
+@dataclass
+class GroqConfig:
+    client: Groq
+    model: str
+    temperature: float
+    max_tokens: int          # Groq calls it max_tokens (not num_predict)
+    top_p: float
 
 
-# Language mapping (locale_code -> (language_name, translategemma_code))
-LOCALE_MAP = {
-    "es": ("Spanish", "es"),
-    "fr": ("French", "fr"),
-    "de": ("German", "de"),
-    "it": ("Italian", "it"),
-    "pt": ("Portuguese", "pt"),
-    "pt-BR": ("Brazilian Portuguese", "pt"),
-    "ja": ("Japanese", "ja"),
-    "ko": ("Korean", "ko"),
-    "zh": ("Chinese", "zh-Hans"),
-    "zh-Hant": ("Chinese", "zh-Hant"),
-    "ru": ("Russian", "ru"),
-    "uk": ("Ukrainian", "uk"),
-    "ar": ("Arabic", "ar"),
-    "hi": ("Hindi", "hi"),
-    "tr": ("Turkish", "tr"),
-    "nl": ("Dutch", "nl"),
-    "sv": ("Swedish", "sv"),
-    "no": ("Norwegian", "no"),
-    "da": ("Danish", "da"),
-    "fi": ("Finnish", "fi"),
-    "pl": ("Polish", "pl"),
-    "cs": ("Czech", "cs"),
-    "sk": ("Slovak", "sk"),
-    "sl": ("Slovenian", "sl"),
-    "bg": ("Bulgarian", "bg"),
-    "hu": ("Hungarian", "hu"),
-    "el": ("Greek", "el"),
-    "he": ("Hebrew", "he"),
-    "th": ("Thai", "th"),
-    "vi": ("Vietnamese", "vi"),
-    "id": ("Indonesian", "id"),
+@dataclass
+class OpenAIConfig:
+    client: OpenAI
+    model: str
+    temperature: float
+    max_tokens: int
+    top_p: float
+
+def http_post_json(url: str, payload: Dict[str, Any], timeout_s: float) -> Dict[str, Any]:
+    data = json.dumps(payload).encode("utf-8")
+    req = request.Request(
+        url,
+        data=data,
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    with request.urlopen(req, timeout=timeout_s) as resp:
+        body = resp.read().decode("utf-8")
+        return json.loads(body)
+
+
+def strip_markdown(s: str) -> str:
+    """Remove common markdown formatting from output."""
+    # Remove bold/italic markers
+    s = re.sub(r'\*\*(.+?)\*\*', r'\1', s)
+    s = re.sub(r'\*(.+?)\*', r'\1', s)
+    s = re.sub(r'__(.+?)__', r'\1', s)
+    s = re.sub(r'_(.+?)_', r'\1', s)
+    # Remove stray asterisks
+    s = re.sub(r'\*+', '', s)
+    return s.strip()
+
+
+def ollama_generate(cfg: OllamaConfig, prompt: str) -> str:
+    url = cfg.host.rstrip("/") + "/api/generate"
+    payload = {
+        "model": cfg.model,
+        "prompt": prompt,
+        "stream": False,
+        "options": {
+            "temperature": cfg.temperature,
+            "num_ctx": cfg.num_ctx,
+            "num_predict": cfg.num_predict,
+            "top_p": cfg.top_p,
+        },
+    }
+    resp = http_post_json(url, payload, cfg.timeout_s)
+    out = resp.get("response", "")
+    # Clean up common LLM artifacts
+    out = strip_markdown(out)
+    return out.strip()
+
+def groq_generate(cfg: GroqConfig, prompt: str) -> str:
+    try:
+        response = cfg.client.chat.completions.create(
+            model=cfg.model,
+            messages=[
+                {"role": "user", "content": prompt},
+            ],
+            temperature=cfg.temperature,
+            max_tokens=cfg.max_tokens,
+            top_p=cfg.top_p,
+            stream=False,
+        )
+        out = response.choices[0].message.content or ""
+        out = strip_markdown(out)
+        return out.strip()
+
+    except Exception as e:
+        raise RuntimeError(f"Groq API error: {str(e)}") from e
+
+
+def openai_generate(cfg: OpenAIConfig, prompt: str) -> str:
+    def _call_openai(prompt_text: str, no_thinking: bool = False):
+        kwargs = {
+            "model": cfg.model,
+            "messages": [
+                {"role": "user", "content": prompt_text},
+            ],
+            "temperature": cfg.temperature,
+            "max_tokens": cfg.max_tokens,
+            "top_p": cfg.top_p,
+            "stream": False,
+        }
+        # Local OpenAI-compatible servers (vLLM/llama.cpp/LM Studio) may support this.
+        if no_thinking:
+            kwargs["extra_body"] = {"chat_template_kwargs": {"enable_thinking": False}}
+        return cfg.client.chat.completions.create(**kwargs)
+
+    try:
+        response = _call_openai(prompt)
+        try:
+            print(f"[openai-debug] response json:\n{response.model_dump_json(indent=2)}", file=sys.stderr)
+        except Exception:
+            print(f"[openai-debug] response object: {response}", file=sys.stderr)
+
+        choice = response.choices[0]
+        message = choice.message
+        out = (message.content or "").strip()
+
+        # Some reasoning models put everything in reasoning_content and leave content empty.
+        if not out and getattr(message, "reasoning_content", None):
+            print(
+                "[openai-debug] Empty content with reasoning_content detected; retrying with no-thinking hint.",
+                file=sys.stderr,
+            )
+            force_final_prompt = (
+                prompt
+                + "\n\nFINAL INSTRUCTION: Output ONLY the final translated string. "
+                  "No analysis. No reasoning. No extra lines."
+            )
+            second_response = _call_openai(force_final_prompt, no_thinking=True)
+            try:
+                print(
+                    f"[openai-debug] second response json:\n{second_response.model_dump_json(indent=2)}",
+                    file=sys.stderr,
+                )
+            except Exception:
+                print(f"[openai-debug] second response object: {second_response}", file=sys.stderr)
+            out = (second_response.choices[0].message.content or "").strip()
+
+        if not out:
+            raise RuntimeError("OpenAI response content is empty")
+
+        out = strip_markdown(out)
+        return out.strip()
+    except Exception as e:
+        raise RuntimeError(f"OpenAI API error: {str(e)}") from e
+
+
+def extract_placeholder_names(s: str) -> List[str]:
+    """Extract placeholder variable names (not the full braced expression).
+
+    For '{name}' returns ['name']
+    For '{count} {count, plural, =1{hop} other{hops}}' returns ['count']
+    """
+    names = set()
+    # Get ICU variable names first
+    for m in ICU_VAR_RE.finditer(s):
+        names.add(m.group(1))
+    # Get simple placeholders, but skip if they're inside ICU blocks (text forms like {hop})
+    # We do this by checking if the name is also an ICU variable - if not, it's a simple placeholder
+    # unless it looks like a word (ICU text forms are usually short words)
+    for m in SIMPLE_PLACEHOLDER_RE.finditer(s):
+        name = m.group(1)
+        # Check if this appears as a simple {name} placeholder (not inside ICU)
+        # by looking at what comes after it
+        full_match = m.group(0)
+        pos = m.start()
+        # Look for pattern like {name, plural/select - if found, skip (handled by ICU_VAR_RE)
+        rest = s[pos:]
+        if re.match(r"\{\w+\s*,\s*(?:plural|select|selectordinal)", rest, re.IGNORECASE):
+            continue
+        # Check if this is likely a text form inside ICU (preceded by =X{ or other{)
+        before = s[:pos]
+        if re.search(r"(?:=\d+|zero|one|two|few|many|other)\s*$", before, re.IGNORECASE):
+            continue  # This is a text form like "=1{hop}", skip it
+        names.add(name)
+    return sorted(names)
+
+
+def build_prompt(text: str, target_lang: str, placeholder_names: List[str], has_icu: bool, ask_confidence: bool = False) -> str:
+    preserve_list = "\n".join(f"- {{{t}}}" for t in placeholder_names) if placeholder_names else "- (none)"
+    
+    icu_note = ""
+    if has_icu:
+        icu_note = (
+            "ICU FORMAT RULES:\n"
+            f"- This text uses ICU plural/select format: {{var, plural, =1{{singular}} other{{plural}}}}\n"
+            "- Keep structure keywords EXACTLY: plural, select, =0, =1, =2, zero, one, two, few, many, other\n"
+            f"- TRANSLATE the words inside each form to {target_lang}\n"
+            "- Example: =1{item} other{items} -> translate 'item'/'items' but keep =1{{ }} other{{ }} structure\n\n"
+        )
+    
+    if ask_confidence:
+        return (
+            f"Translate this UI string to {target_lang}.\n\n"
+            "RULES:\n"
+            "- Placeholders like {name}, {count} must appear EXACTLY unchanged.\n"
+            "- Use infinitive verb forms for buttons (Save, Delete, etc.).\n"
+            f"- Use natural {target_lang} word order.\n"
+            "- Keep brand names and technical terms unchanged.\n\n"
+            f"{icu_note}"
+            f"Placeholders: {', '.join(f'{{{t}}}' for t in placeholder_names) if placeholder_names else 'none'}\n\n"
+            f"English: {text}\n\n"
+            "Respond with EXACTLY two lines:\n"
+            "1. The translation (no quotes)\n"
+            "2. Confidence score 1-5 (5=certain, 1=unsure)\n\n"
+            "Example response:\n"
+            "Guardar archivo\n"
+            "5"
+        )
+    else:
+        return (
+            f"Translate this UI string to {target_lang}. Return ONLY the translation.\n\n"
+            "RULES:\n"
+            "- Output the translated text ONLY. No markdown, no quotes, no explanations.\n"
+            "- Placeholders like {name}, {count} must appear EXACTLY unchanged.\n"
+            "- Use infinitive verb forms for buttons (Save, Delete, etc.).\n"
+            f"- Use natural {target_lang} word order.\n"
+            "- Keep brand names and technical terms unchanged.\n"
+            "- Translation length should be similar to the original.\n\n"
+            f"{icu_note}"
+            f"Placeholders: {', '.join(f'{{{t}}}' for t in placeholder_names) if placeholder_names else 'none'}\n\n"
+            f"English: {text}\n"
+            f"{target_lang}:"
+        )
+
+
+def parse_confidence_response(response: str) -> Tuple[str, int]:
+    """Parse response with translation and confidence score.
+    
+    Returns (translation, confidence) where confidence is 1-5, or 0 if unparseable.
+    """
+    lines = response.strip().split('\n')
+    if len(lines) >= 2:
+        translation = '\n'.join(lines[:-1]).strip()  # All but last line
+        try:
+            # Try to extract number from last line
+            last_line = lines[-1].strip()
+            # Handle formats like "5", "5/5", "Confidence: 5"
+            match = re.search(r'\b([1-5])\b', last_line)
+            if match:
+                confidence = int(match.group(1))
+                return translation, confidence
+        except ValueError:
+            pass
+    # Fallback: treat whole response as translation with unknown confidence
+    return strip_markdown(response), 0
+
+
+def looks_like_translation_failed(src: str, out: str) -> bool:
+    if not out:
+        return True
+    if src.strip() == out.strip() and len(src.strip()) > 8:
+        return True
+    # Detect hallucination: output much longer than input (3x+ for short strings, 2x for longer)
+    src_len = len(src.strip())
+    out_len = len(out.strip())
+    if src_len < 50 and out_len > src_len * 3:
+        return True
+    if src_len >= 50 and out_len > src_len * 2:
+        return True
+    return False
+
+
+def has_icu_block(s: str) -> bool:
+    """Check if string contains ICU plural/select block."""
+    return bool(ICU_VAR_RE.search(s))
+
+
+def validate_preserved_tokens(src: str, out: str) -> Tuple[bool, Optional[str]]:
+    """Validate that placeholder names are preserved in translation."""
+    src_names = extract_placeholder_names(src)
+    
+    # Check each placeholder name appears in output
+    for name in src_names:
+        # Look for {name} or {name, plural/select...}
+        pattern = r"\{" + re.escape(name) + r"(?:\}|\s*,)"
+        if not re.search(pattern, out):
+            return False, f"Missing placeholder: {{{name}}}"
+    
+    # If source has ICU block, output should too
+    if has_icu_block(src) and not has_icu_block(out):
+        return False, "ICU plural/select block missing in output"
+    
+    return True, None
+
+
+def compute_confidence(src: str, out: str) -> Tuple[float, List[str]]:
+    """
+    Compute confidence score (0.0 to 1.0) for a translation.
+    Returns (score, list of issues).
+    """
+    issues = []
+    score = 1.0
+    
+    src_len = len(src.strip())
+    out_len = len(out.strip())
+    
+    # Length ratio check
+    if src_len > 0:
+        ratio = out_len / src_len
+        if ratio < 0.3:  # Way too short
+            score -= 0.4
+            issues.append("too_short")
+        elif ratio < 0.5:
+            score -= 0.2
+            issues.append("short")
+        elif ratio > 2.5:  # Way too long
+            score -= 0.4
+            issues.append("too_long")
+        elif ratio > 1.8:
+            score -= 0.2
+            issues.append("long")
+    
+    # Contains question mark when source doesn't (model asking questions)
+    if '?' in out and '?' not in src:
+        score -= 0.3
+        issues.append("added_question")
+    
+    # Contains common LLM artifacts
+    artifacts = ['```', '**', 'translation:', 'here is', 'certainly', 'i can', 'i\'ll']
+    out_lower = out.lower()
+    for artifact in artifacts:
+        if artifact in out_lower:
+            score -= 0.3
+            issues.append(f"artifact:{artifact}")
+            break
+    
+    # Output looks like it's in English still (common words)
+    english_indicators = ['the ', ' is ', ' are ', ' was ', ' were ', ' have ', ' has ', 'you ', ' your ']
+    english_count = sum(1 for ind in english_indicators if ind in out_lower)
+    if english_count >= 3 and src_len > 20:
+        score -= 0.3
+        issues.append("likely_english")
+    
+    # Contains newlines when source doesn't
+    if '\n' in out and '\n' not in src:
+        score -= 0.2
+        issues.append("added_newlines")
+    
+    # ICU/placeholder validation
+    ok, _ = validate_preserved_tokens(src, out)
+    if not ok:
+        score -= 0.3
+        issues.append("placeholder_error")
+    
+    return max(0.0, score), issues
+
+
+# Keys to skip translation (brand names)
+SKIP_KEYS = {
+    "appTitle",
 }
 
-# Keys to skip translation
-SKIP_KEYS = {"appTitle"}
-
-# Manual translations for complex strings
+# Manual translations for problematic strings (key -> {locale: translation})
 MANUAL_TRANSLATIONS: Dict[str, Dict[str, str]] = {
     "repeater_daysHoursMinsSecs": {
         "es": "{days} días {hours}h {minutes}m {seconds}s",
@@ -106,126 +460,100 @@ MANUAL_TRANSLATIONS: Dict[str, Dict[str, str]] = {
 }
 
 
-def http_post_json(url: str, payload: Dict[str, Any], timeout_s: float) -> Dict[str, Any]:
-    data = json.dumps(payload).encode("utf-8")
-    req = request.Request(url, data=data, headers={"Content-Type": "application/json"}, method="POST")
-    with request.urlopen(req, timeout=timeout_s) as resp:
-        return json.loads(resp.read().decode("utf-8"))
-
-
-def ollama_generate(cfg: OllamaConfig, prompt: str) -> str:
-    url = cfg.host.rstrip("/") + "/api/generate"
-    payload = {
-        "model": cfg.model,
-        "prompt": prompt,
-        "stream": False,
-        "options": {"temperature": cfg.temperature},
-    }
-    resp = http_post_json(url, payload, cfg.timeout_s)
-    return resp.get("response", "").strip()
-
-
-def extract_placeholder_names(s: str) -> List[str]:
-    """Extract placeholder variable names from string."""
-    names = set()
-
-    # Get ICU variable names
-    for m in ICU_VAR_RE.finditer(s):
-        names.add(m.group(1))
-
-    # Get simple placeholders (excluding ICU text forms)
-    for m in SIMPLE_PLACEHOLDER_RE.finditer(s):
-        name = m.group(1)
-        pos = m.start()
-        rest = s[pos:]
-
-        # Skip if this is part of an ICU block
-        if re.match(r"\{\w+\s*,\s*(?:plural|select|selectordinal)", rest, re.IGNORECASE):
-            continue
-
-        # Skip if this is a text form inside ICU (preceded by =X{ or other{)
-        before = s[:pos]
-        if re.search(r"(?:=\d+|zero|one|two|few|many|other)\s*$", before, re.IGNORECASE):
-            continue
-
-        names.add(name)
-
-    return sorted(names)
-
-
-def has_icu_block(s: str) -> bool:
-    """Check if string contains ICU plural/select block."""
-    return bool(ICU_VAR_RE.search(s))
-
-
-def build_prompt(text: str, target_lang: str, target_code: str, placeholder_names: List[str], has_icu: bool) -> str:
-    """Build TranslateGemma-compatible prompt with placeholder preservation instructions."""
-    # Build instructions for placeholder preservation
-    instructions = []
-    if placeholder_names:
-        placeholders = ', '.join(f'{{{t}}}' for t in placeholder_names)
-        instructions.append(f"CRITICAL: Keep these placeholders EXACTLY as they appear: {placeholders}")
-    if has_icu:
-        instructions.append("CRITICAL: Preserve ICU message format structure (plural, select, =0, =1, other, etc.). Only translate the text inside the forms.")
-
-    # Add instructions to the system prompt, not to the text itself
-    instruction_text = "\n".join(instructions) if instructions else ""
-    separator = "\n" if instruction_text else ""
-
-    # TranslateGemma expects this exact format (note the two blank lines before text)
-    return f"""You are a professional English (en) to {target_lang} ({target_code}) translator. Your goal is to accurately convey the meaning and nuances of the original English text while adhering to {target_lang} grammar, vocabulary, and cultural sensitivities.
-Produce only the {target_lang} translation, without any additional explanations or commentary.{separator}{instruction_text}
-Please translate the following English text into {target_lang}:
-
-
-{text}"""
-
-
-def validate_preserved_tokens(src: str, out: str) -> Tuple[bool, Optional[str]]:
-    """Validate that placeholder names are preserved."""
-    src_names = extract_placeholder_names(src)
-
-    for name in src_names:
-        pattern = r"\{" + re.escape(name) + r"(?:\}|\s*,)"
-        if not re.search(pattern, out):
-            return False, f"Missing placeholder: {{{name}}}"
-
-    if has_icu_block(src) and not has_icu_block(out):
-        return False, "ICU plural/select block missing"
-
-    return True, None
+def is_translatable_entry(key: str, value: Any) -> bool:
+    if key == "@@locale":
+        return False
+    if key in SKIP_KEYS:
+        return False
+    if key.startswith("@"):
+        return False
+    if not isinstance(value, str):
+        return False
+    if value.strip() == "":
+        return False
+    return True
 
 
 def translate_one(
     key: str,
     text: str,
     target_lang: str,
-    target_code: str,
-    cfg: OllamaConfig,
+    generate_fn,                # ← new: callable that takes config & prompt → str
+    config,                     # ← either OllamaConfig or GroqConfig
     retries: int,
     backoff_s: float,
-    fallback_cfg: Optional[OllamaConfig] = None,
+    fallback_generate_fn=None,
+    fallback_config=None,
+    confidence_threshold: float = 0.7,
+    model_confidence_threshold: int = 4,
+    ask_model_confidence: bool = True,
 ) -> Tuple[str, str, Optional[str], bool]:
-    """Translate a single string. Returns (key, translated_text, error_or_none, used_fallback)."""
+    """
+    Translate a single string.
+    Returns (key, translated_text, error_or_none, used_fallback_model).
+    """
     placeholder_names = extract_placeholder_names(text)
     text_has_icu = has_icu_block(text)
-    prompt = build_prompt(text, target_lang, target_code, placeholder_names, text_has_icu)
+    
+    # Ask for confidence if we have a fallback model
+    should_ask_confidence = ask_model_confidence and fallback_config and fallback_config.model != config.model
+    prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu, ask_confidence=should_ask_confidence)
+    used_fallback = False
 
     last_err: Optional[str] = None
     for attempt in range(retries + 1):
         try:
-            out = ollama_generate(cfg, prompt)
-
-            # Validate placeholders
+            raw_out = generate_fn(config, prompt)
+            
+            # Parse confidence if we asked for it
+            if should_ask_confidence:
+                out, model_confidence = parse_confidence_response(raw_out)
+            else:
+                out = raw_out
+                model_confidence = 5  # Assume high confidence if not asked
+            
             ok, why = validate_preserved_tokens(text, out)
             if not ok:
                 last_err = f"Validation failed: {why}"
-                if attempt < retries:
-                    time.sleep(backoff_s * (attempt + 1))
-                    continue
+                # Retry without confidence format for simpler response
+                prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu, ask_confidence=False)
+                prompt = (
+                    prompt
+                    + "\n\nIMPORTANT: You MUST keep every {...} segment exactly unchanged. "
+                      "If you cannot, return the original text unchanged."
+                )
                 raise ValueError(last_err)
 
-            return key, out, None, False
+            if looks_like_translation_failed(text, out) and attempt < retries:
+                last_err = "Output identical/suspicious; retrying"
+                time.sleep(backoff_s * (attempt + 1))
+                continue
+
+            # Check if model reported low confidence - use fallback
+            if model_confidence > 0 and model_confidence < model_confidence_threshold and fallback_config:
+                fallback_prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu, ask_confidence=False)
+                fallback_out = generate_fn(fallback_config, fallback_prompt)
+                fallback_ok, _ = validate_preserved_tokens(text, fallback_out)
+                if fallback_ok and not looks_like_translation_failed(text, fallback_out):
+                    return key, fallback_out, None, True
+
+            # Also check computed confidence and use fallback model if needed
+            confidence, issues = compute_confidence(text, out)
+            if confidence < confidence_threshold and fallback_config and fallback_config.model != config.model:
+                # Low confidence - try with bigger model
+                fallback_prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu)
+                fallback_out = generate_fn(fallback_config, fallback_prompt)
+                fallback_ok, _ = validate_preserved_tokens(text, fallback_out)
+                fallback_conf, _ = compute_confidence(text, fallback_out)
+                
+                if fallback_ok and fallback_conf > confidence:
+                    # Fallback is better
+                    return key, fallback_out, None, True
+                elif fallback_ok and not ok:
+                    # Original failed validation but fallback passed
+                    return key, fallback_out, None, True
+            
+            return key, out, None, used_fallback
 
         except Exception as e:
             last_err = str(e)
@@ -233,74 +561,21 @@ def translate_one(
                 time.sleep(backoff_s * (attempt + 1))
                 continue
 
-    # Try fallback model if available
-    if fallback_cfg:
+    # Last resort: try fallback model
+    if fallback_config and fallback_config.model != config.model:
         try:
-            fallback_prompt = build_prompt(text, target_lang, target_code, placeholder_names, text_has_icu)
-            fallback_out = ollama_generate(fallback_cfg, fallback_prompt)
+            fallback_prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu)
+            fallback_out = generate_fn(fallback_config, fallback_prompt)
             fallback_ok, _ = validate_preserved_tokens(text, fallback_out)
-            if fallback_ok:
+            if fallback_ok and not looks_like_translation_failed(text, fallback_out):
                 return key, fallback_out, None, True
         except Exception:
             pass
 
-    # Fallback to original
-    return key, text, last_err, False
-
-
-def is_translatable_entry(key: str, value: Any) -> bool:
-    """Check if an entry should be translated."""
-    if key == "@@locale" or key.startswith("@") or key in SKIP_KEYS:
-        return False
-    return isinstance(value, str) and value.strip() != ""
-
-
-def find_missing_keys(source_data: Dict[str, Any], target_data: Dict[str, Any]) -> List[str]:
-    """Find keys that are missing or empty in target."""
-    missing = []
-    for key in source_data:
-        if key == "@@locale" or key.startswith("@"):
-            continue
-        if key not in target_data or (isinstance(target_data.get(key), str) and target_data[key].strip() == ""):
-            missing.append(key)
-    return missing
-
-
-def find_keys_still_template_copy(source_data: Dict[str, Any], target_data: Dict[str, Any]) -> List[str]:
-    """Keys whose value is still exactly the same as the template (typical after cp app_en.arb → app_xx.arb)."""
-    out: List[str] = []
-    for key in source_data:
-        if key == "@@locale" or key.startswith("@"):
-            continue
-        src = source_data.get(key)
-        if not is_translatable_entry(key, src):
-            continue
-        if not isinstance(src, str):
-            continue
-        tgt = target_data.get(key)
-        if not isinstance(tgt, str) or tgt.strip() == "":
-            out.append(key)
-        elif tgt == src:
-            out.append(key)
-    return out
-
-
-def get_all_locale_files(l10n_dir: str, template_file: str) -> List[Tuple[str, str]]:
-    """Find all locale .arb files excluding template. Returns [(locale_code, file_path)]."""
-    locales = []
-    template_basename = os.path.basename(template_file)
-
-    for filename in os.listdir(l10n_dir):
-        if filename.endswith('.arb') and filename != template_basename:
-            if filename.startswith('app_'):
-                locale = filename[4:-4]  # app_es.arb -> es
-                locales.append((locale, os.path.join(l10n_dir, filename)))
-
-    return sorted(locales)
+    return key, text, last_err, False  # fallback to original on failure
 
 
 def fmt_duration(seconds: float) -> str:
-    """Format duration as human-readable string."""
     if seconds < 60:
         return f"{seconds:.1f}s"
     m = int(seconds // 60)
@@ -312,53 +587,330 @@ def fmt_duration(seconds: float) -> str:
     return f"{h}h {m2}m"
 
 
+def find_missing_keys(source_data: Dict[str, Any], target_data: Dict[str, Any]) -> List[str]:
+    """Find keys that are in source but not in target, or have empty values (excluding metadata keys)."""
+    missing = []
+    for key in source_data:
+        if key == "@@locale":
+            continue
+        if key.startswith("@"):
+            continue
+        if key not in target_data:
+            missing.append(key)
+        elif isinstance(target_data.get(key), str) and target_data[key].strip() == "":
+            # Also include keys with empty string values
+            missing.append(key)
+    return missing
+
+
+def get_all_locale_files(l10n_dir: str, template_file: str) -> List[Tuple[str, str]]:
+    """Find all locale .arb files in the directory, excluding the template.
+    
+    Returns list of (locale_code, file_path) tuples.
+    """
+    locales = []
+    template_basename = os.path.basename(template_file)
+    
+    for filename in os.listdir(l10n_dir):
+        if not filename.endswith('.arb'):
+            continue
+        if filename == template_basename:
+            continue
+        # Extract locale from filename like app_es.arb -> es
+        if filename.startswith('app_') and filename.endswith('.arb'):
+            locale = filename[4:-4]  # Remove 'app_' prefix and '.arb' suffix
+            filepath = os.path.join(l10n_dir, filename)
+            locales.append((locale, filepath))
+    
+    return sorted(locales)
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--in", dest="in_path", required=True, help="Input .arb/.json file path (source/template)")
+    ap.add_argument("--out", dest="out_path", default=None, help="Output .arb/.json file path (required unless using --l10n-dir)")
+    ap.add_argument("--to-locale", default=None, help="Target locale code, e.g. es, fr, de (required unless using --l10n-dir)")
+    ap.add_argument("--l10n-dir", default=None, help="Directory containing locale .arb files. When set, translates all locales.")
+    ap.add_argument("--missing-only", action="store_true", help="Only translate keys missing from target file")
+    ap.add_argument("--target-lang", default=None, help="Target language name for the model, e.g. Spanish (defaults from locale)")
+    ap.add_argument("--model", default="gemma3:4b", help="Model name for selected backend")
+    ap.add_argument("--backend", choices=["ollama", "groq", "openai"], default="ollama",
+                    help="Inference backend to use")
+    ap.add_argument("--groq-api-key", default=None,
+                    help="Groq API key (can also be set via GROQ_API_KEY env var)")
+    ap.add_argument("--openai-api-key", default=None,
+                    help="OpenAI API key (for local servers this can often be omitted)")
+    ap.add_argument("--openai-base-url", default="http://localhost:1234/v1",
+                    help="OpenAI-compatible base URL for local LLM server")
+    ap.add_argument("--fallback-model", default=None, help="Larger model to use for low-confidence translations")
+    ap.add_argument("--confidence-threshold", type=float, default=0.7, help="Computed confidence threshold to trigger fallback (0.0-1.0)")
+    ap.add_argument("--model-confidence-threshold", type=int, default=4, help="Model self-reported confidence threshold (1-5, use fallback if below)")
+    ap.add_argument("--retry-model", default=None, help="Model to use for end-of-run retries")
+    ap.add_argument("--host", default="http://localhost:11434", help="Ollama host")
+    ap.add_argument("--timeout", type=float, default=120.0, help="HTTP timeout seconds")
+    ap.add_argument("--temperature", type=float, default=0.2, help="Model temperature")
+    ap.add_argument("--num-ctx", type=int, default=4096, help="Context size")
+    ap.add_argument("--num-predict", type=int, default=256, help="Max tokens to generate")
+    ap.add_argument("--top-p", type=float, default=0.9, help="Top-p")
+    ap.add_argument("--concurrency", type=int, default=4, help="Parallel requests")
+    ap.add_argument("--retries", type=int, default=2, help="Retries per string")
+    ap.add_argument("--backoff", type=float, default=0.6, help="Backoff seconds base")
+    ap.add_argument("--dry-run", action="store_true", help="Do not write file; just print summary")
+    ap.add_argument("--progress-every", type=int, default=1, help="Print progress every N completed strings (default: 1)")
+    args = ap.parse_args()
+
+    locale_map = {
+        "es": "Spanish",
+        "fr": "French",
+        "de": "German",
+        "it": "Italian",
+        "pt": "Portuguese",
+        "pt-BR": "Brazilian Portuguese",
+        "ja": "Japanese",
+        "ko": "Korean",
+        "zh": "Chinese (Simplified)",
+        "zh-Hant": "Chinese (Traditional)",
+        "ru": "Russian",
+        "uk": "Ukrainian",
+        "ar": "Arabic",
+        "hi": "Hindi",
+        "tr": "Turkish",
+        "nl": "Dutch",
+        "sv": "Swedish",
+        "no": "Norwegian",
+        "da": "Danish",
+        "fi": "Finnish",
+        "pl": "Polish",
+        "cs": "Czech",
+        "sk": "Slovak",
+        "sl": "Slovenian",
+        "bg": "Bulgarian",
+        "el": "Greek",
+        "he": "Hebrew",
+        "th": "Thai",
+        "vi": "Vietnamese",
+        "id": "Indonesian",
+    }
+
+    # Read source/template file
+    try:
+        with open(args.in_path, "r", encoding="utf-8") as f:
+            source_data = json.load(f)
+    except Exception as e:
+        print(f"Failed to read input: {e}", file=sys.stderr)
+        return 2
+
+    if not isinstance(source_data, dict):
+        print("Input JSON must be an object at top-level.", file=sys.stderr)
+        return 2
+
+    # If --l10n-dir is provided, process all locale files
+    if args.l10n_dir:
+        locales = get_all_locale_files(args.l10n_dir, args.in_path)
+        if not locales:
+            print(f"No locale files found in {args.l10n_dir}", file=sys.stderr)
+            return 1
+        
+        print(f"Found {len(locales)} locale file(s) to process")
+        
+        total_translated = 0
+        for locale_code, locale_path in locales:
+            target_lang = locale_map.get(locale_code, locale_code)
+            
+            # Read existing target file
+            try:
+                with open(locale_path, "r", encoding="utf-8") as f:
+                    target_data = json.load(f)
+            except Exception as e:
+                print(f"  [{locale_code}] Failed to read {locale_path}: {e}")
+                continue
+            
+            if args.missing_only:
+                missing_keys = find_missing_keys(source_data, target_data)
+                if not missing_keys:
+                    print(f"  [{locale_code}] No missing keys")
+                    continue
+                print(f"  [{locale_code}] {len(missing_keys)} missing key(s): {', '.join(missing_keys[:5])}{'...' if len(missing_keys) > 5 else ''}")
+            else:
+                missing_keys = None
+            
+            # Run translation for this locale
+            result = translate_locale(
+                source_data=source_data,
+                target_data=target_data,
+                target_locale=locale_code,
+                target_lang=target_lang,
+                out_path=locale_path,
+                args=args,
+                locale_map=locale_map,
+                missing_keys=missing_keys,
+            )
+            total_translated += result
+        
+        print(f"\nTotal: {total_translated} string(s) translated across {len(locales)} locale(s)")
+        return 0
+    
+    # Single locale mode - validate required args
+    if not args.out_path:
+        print("--out is required when not using --l10n-dir", file=sys.stderr)
+        return 1
+    if not args.to_locale:
+        print("--to-locale is required when not using --l10n-dir", file=sys.stderr)
+        return 1
+    
+    target_lang = args.target_lang or locale_map.get(args.to_locale, args.to_locale)
+
+    # Read existing target file if --missing-only and file exists
+    target_data: Dict[str, Any] = {}
+    missing_keys: Optional[List[str]] = None
+    if args.missing_only:
+        if os.path.exists(args.out_path):
+            try:
+                with open(args.out_path, "r", encoding="utf-8") as f:
+                    target_data = json.load(f)
+                missing_keys = find_missing_keys(source_data, target_data)
+                if not missing_keys:
+                    print(f"No missing keys in {args.out_path}")
+                    return 0
+                print(f"Found {len(missing_keys)} missing key(s) to translate")
+            except Exception as e:
+                print(f"Failed to read target file: {e}", file=sys.stderr)
+                return 2
+        else:
+            print(f"Target file {args.out_path} does not exist. Will translate all strings.")
+
+    result = translate_locale(
+        source_data=source_data,
+        target_data=target_data,
+        target_locale=args.to_locale,
+        target_lang=target_lang,
+        out_path=args.out_path,
+        args=args,
+        locale_map=locale_map,
+        missing_keys=missing_keys,
+    )
+    return 0 if result >= 0 else 1
+
+
 def translate_locale(
     source_data: Dict[str, Any],
     target_data: Dict[str, Any],
     target_locale: str,
     target_lang: str,
-    target_code: str,
     out_path: str,
     args,
+    locale_map: Dict[str, str],
     missing_keys: Optional[List[str]] = None,
 ) -> int:
     """Translate a single locale. Returns number of strings translated."""
 
-    cfg = OllamaConfig(
-        host=args.host,
-        model=args.model,
-        timeout_s=args.timeout,
-        temperature=args.temperature,
-    )
+    if args.backend == "groq":
+        if not GROQ_AVAILABLE:
+            print("Error: Groq backend requested but 'groq' package is not installed.", file=sys.stderr)
+            print("Run:  pip install groq", file=sys.stderr)
+            return -1
 
-    fallback_cfg = None
-    if args.fallback_model:
-        fallback_cfg = OllamaConfig(
+        api_key = args.groq_api_key or os.environ.get("GROQ_API_KEY")
+        if not api_key:
+            print("Error: --groq-api-key or GROQ_API_KEY environment variable is required", file=sys.stderr)
+            return 1
+
+        client = Groq(api_key=api_key)
+
+        cfg = GroqConfig(
+            client=client,
+            model=args.model,
+            temperature=args.temperature,
+            max_tokens=args.num_predict,       # reusing the same flag
+            top_p=args.top_p,
+        )
+        generate_fn = groq_generate
+
+        fallback_cfg = None
+        fallback_generate_fn = None
+        if args.fallback_model:
+            print("Warning: --fallback-model not yet supported with Groq backend", file=sys.stderr)
+
+    elif args.backend == "openai":
+        if not OPENAI_AVAILABLE:
+            print("Error: OpenAI backend requested but 'openai' package is not installed.", file=sys.stderr)
+            print("Run:  pip install openai", file=sys.stderr)
+            return -1
+
+        # Local OpenAI-compatible servers often accept any non-empty API key.
+        api_key = args.openai_api_key or os.environ.get("OPENAI_API_KEY") or "local"
+        client = OpenAI(api_key=api_key, base_url=args.openai_base_url)
+
+        cfg = OpenAIConfig(
+            client=client,
+            model=args.model,
+            temperature=args.temperature,
+            max_tokens=args.num_predict,
+            top_p=args.top_p,
+        )
+        generate_fn = openai_generate
+
+        fallback_cfg = None
+        fallback_generate_fn = None
+        if args.fallback_model:
+            fallback_cfg = OpenAIConfig(
+                client=client,
+                model=args.fallback_model,
+                temperature=args.temperature,
+                max_tokens=args.num_predict,
+                top_p=args.top_p,
+            )
+            fallback_generate_fn = openai_generate
+
+    else:  # ollama
+        cfg = OllamaConfig(
             host=args.host,
-            model=args.fallback_model,
+            model=args.model,
             timeout_s=args.timeout,
             temperature=args.temperature,
+            num_ctx=args.num_ctx,
+            num_predict=args.num_predict,
+            top_p=args.top_p,
         )
+        generate_fn = ollama_generate
 
-    # Start with target data or source data
-    out_data: Dict[str, Any] = dict(target_data) if target_data else dict(source_data)
+        fallback_cfg = None
+        fallback_generate_fn = None
+        if args.fallback_model:
+            fallback_cfg = OllamaConfig(
+                host=args.host,
+                model=args.fallback_model,
+                timeout_s=args.timeout,
+                temperature=args.temperature,
+                num_ctx=args.num_ctx,
+                num_predict=args.num_predict,
+                top_p=args.top_p,
+            )
+            fallback_generate_fn = ollama_generate
+
+    # Start with target data (preserves existing translations) or source data
+    if target_data:
+        out_data: Dict[str, Any] = dict(target_data)
+    else:
+        out_data: Dict[str, Any] = dict(source_data)
     out_data["@@locale"] = target_locale
 
     # Build list of items to translate
     if missing_keys is not None:
+        # Only translate missing keys
         items: List[Tuple[str, str]] = [
-            (k, source_data[k]) for k in missing_keys
+            (k, source_data[k]) for k in missing_keys 
             if is_translatable_entry(k, source_data.get(k))
         ]
-        # Copy metadata for missing items
+        # Also copy over any metadata keys for missing items
         for key in missing_keys:
             meta_key = f"@{key}"
             if meta_key in source_data:
                 out_data[meta_key] = source_data[meta_key]
     else:
         items: List[Tuple[str, str]] = [(k, v) for k, v in source_data.items() if is_translatable_entry(k, v)]
-
-    # Apply manual translations
+    
+    # Apply manual translations first
     manual_count = 0
     items_to_translate: List[Tuple[str, str]] = []
     for k, v in items:
@@ -367,73 +919,175 @@ def translate_locale(
             manual_count += 1
         else:
             items_to_translate.append((k, v))
-
+    
     if manual_count > 0:
         print(f"Applied {manual_count} manual translation(s)")
-
+    
     total = len(items_to_translate)
+    if total == 0 and manual_count == 0:
+        print("No translatable string entries found (excluding @@locale and @metadata).")
+        return 0
+    
     if total == 0:
-        if manual_count > 0:
-            print("All strings handled by manual translations.")
-        return manual_count
-
-    fallback_info = f" (fallback: {args.fallback_model})" if args.fallback_model else ""
-    print(f"Translating {total} strings -> {target_lang} using {cfg.model}{fallback_info} (concurrency={args.concurrency})")
-
+        print("All strings handled by manual translations.")
+    else:
+        fallback_info = f" (fallback: {args.fallback_model})" if args.fallback_model else ""
+        print(f"Translating {total} strings -> {target_lang} using {cfg.model}{fallback_info} (concurrency={args.concurrency})")
+    
     start = time.time()
+
     failures: List[Tuple[str, str]] = []
-    translated_ok = manual_count
+    translated_ok = manual_count  # Count manual translations as OK
     fallback_used = 0
     completed = 0
 
-    with ThreadPoolExecutor(max_workers=max(1, args.concurrency)) as ex:
-        future_to_key = {
+    # Build a lookup for original text by key
+    items_dict: Dict[str, str] = dict(items_to_translate)
+
+    # Submit all tasks up front
+    if total > 0:
+        with ThreadPoolExecutor(max_workers=max(1, args.concurrency)) as ex:
+            future_to_key = {
             ex.submit(
-                translate_one,
-                key=k,
-                text=v,
-                target_lang=target_lang,
-                target_code=target_code,
-                cfg=cfg,
-                retries=args.retries,
-                backoff_s=args.backoff,
-                fallback_cfg=fallback_cfg,
-            ): k
-            for (k, v) in items_to_translate
-        }
+                    translate_one,
+                    key=k,
+                    text=v,
+                    target_lang=target_lang,
+                    generate_fn=generate_fn,
+                    config=cfg,
+                    retries=args.retries,
+                    backoff_s=args.backoff,
+                    fallback_generate_fn=fallback_generate_fn,
+                    fallback_config=fallback_cfg,
+                    confidence_threshold=args.confidence_threshold,
+                    model_confidence_threshold=args.model_confidence_threshold,
+                    ask_model_confidence=bool(args.fallback_model),
+                ): k
+                for (k, v) in items_to_translate
+            }
+            print(f"Submitted {len(future_to_key)} translation tasks...")
+            for fut in as_completed(future_to_key):
+                k, translated, err, used_fallback = fut.result()
+                out_data[k] = translated
 
-        for fut in as_completed(future_to_key):
-            k, translated, err, used_fallback = fut.result()
-            out_data[k] = translated
-
-            completed += 1
-            if err:
-                failures.append((k, err))
-                status = "FAIL"
-            else:
-                translated_ok += 1
-                if used_fallback:
-                    fallback_used += 1
-                    status = "OK*"
+                completed += 1
+                if err:
+                    failures.append((k, err))
+                    status = "FAIL"
                 else:
-                    status = "OK"
+                    translated_ok += 1
+                    if used_fallback:
+                        fallback_used += 1
+                        status = "OK*"  # asterisk indicates fallback model was used
+                    else:
+                        status = "OK"
 
-            if completed % args.progress_every == 0 or completed == total:
-                elapsed = time.time() - start
-                rate = completed / elapsed if elapsed > 0 else 0.0
-                remaining = (total - completed) / rate if rate > 0 else 0.0
-                print(f"[{completed:>4}/{total}] {status:<4} {k} | elapsed {fmt_duration(elapsed)} | ETA {fmt_duration(remaining)}")
+                if args.progress_every > 0 and (completed % args.progress_every == 0 or completed == total):
+                    elapsed = time.time() - start
+                    rate = completed / elapsed if elapsed > 0 else 0.0
+                    remaining = (total - completed) / rate if rate > 0 else 0.0
+                    # Keep it single-line friendly but readable.
+                    print(
+                        f"[{completed:>4}/{total}] {status:<4} {k} | "
+                        f"elapsed {fmt_duration(elapsed)} | ETA {fmt_duration(remaining)}"
+                    )
 
     elapsed = time.time() - start
-    fallback_msg = f", fallback_used={fallback_used}" if fallback_used > 0 else ""
-    print(f"Done in {fmt_duration(elapsed)}. OK={translated_ok}{fallback_msg}, errors={len(failures)}")
+    fallback_msg = f", used_fallback_model={fallback_used}" if fallback_used > 0 else ""
+    print(f"Done in {fmt_duration(elapsed)}. OK={translated_ok}{fallback_msg}, errors={len(failures)}: {translated}")
+
+    # Retry failed translations at the end with increasing temperature
+    retry_round = 1
+    max_end_retries = 3
+    retry_model = args.retry_model or args.model
+
+    while failures and retry_round <= max_end_retries:
+        # Increase temperature for each retry round
+        retry_temp = min(cfg.temperature + (0.2 * retry_round), 1.0)
+        print(f"\n--- Retry round {retry_round}/{max_end_retries} for {len(failures)} failed key(s) (model={retry_model}, temp={retry_temp:.1f}) ---")
+        retry_items = [(k, items_dict[k]) for k, _ in failures]
+        failures = []
+        retry_completed = 0
+        retry_total = len(retry_items)
+        retry_start = time.time()
+        if args.backend == "groq":
+            retry_cfg = GroqConfig(
+                client=cfg.client,
+                model=retry_model,
+                temperature=retry_temp,
+                max_tokens=cfg.max_tokens,
+                top_p=cfg.top_p,
+            )
+            retry_generate_fn = groq_generate
+        elif args.backend == "openai":
+            retry_cfg = OpenAIConfig(
+                client=cfg.client,
+                model=retry_model,
+                temperature=retry_temp,
+                max_tokens=cfg.max_tokens,
+                top_p=cfg.top_p,
+            )
+            retry_generate_fn = openai_generate
+        else:
+            retry_cfg = OllamaConfig(
+                host=cfg.host,
+                model=retry_model,
+                timeout_s=cfg.timeout_s,
+                temperature=retry_temp,
+                num_ctx=cfg.num_ctx,
+                num_predict=cfg.num_predict,
+                top_p=cfg.top_p,
+            )
+            retry_generate_fn = ollama_generate 
+        with ThreadPoolExecutor(max_workers=max(1, args.concurrency)) as ex:
+            future_to_key = {
+                ex.submit(
+                    translate_one,
+                    key=k,
+                    text=v,
+                    target_lang=target_lang,
+                    config=retry_cfg,
+                    generate_fn=retry_generate_fn,
+                    retries=args.retries,
+                    backoff_s=args.backoff,
+                ): k
+                for (k, v) in retry_items
+            }
+
+            for fut in as_completed(future_to_key):
+                k, translated, err, used_fb = fut.result()
+                out_data[k] = translated
+
+                retry_completed += 1
+                if err:
+                    failures.append((k, err))
+                    status = "FAIL"
+                else:
+                    translated_ok += 1
+                    status = "OK"
+
+                if args.progress_every > 0 and (retry_completed % args.progress_every == 0 or retry_completed == retry_total):
+                    elapsed = time.time() - retry_start
+                    rate = retry_completed / elapsed if elapsed > 0 else 0.0
+                    remaining = (retry_total - retry_completed) / rate if rate > 0 else 0.0
+                    print(
+                        f"[{retry_completed:>4}/{retry_total}] {status:<4} {k} | "
+                        f"elapsed {fmt_duration(elapsed)} | ETA {fmt_duration(remaining)}"
+                    )
+
+        retry_elapsed = time.time() - retry_start
+        print(f"Retry round {retry_round} done in {fmt_duration(retry_elapsed)}. Remaining failures: {len(failures)}")
+        retry_round += 1
+
+    total_elapsed = time.time() - start
+    print(f"\nTotal time: {fmt_duration(total_elapsed)}. OK={translated_ok}, final fallback={len(failures)}")
 
     if failures:
-        print(f"{len(failures)} translation(s) kept original English:")
-        for k, err in failures[:20]:
+        print("Fallback keys (kept original English due to errors):")
+        for k, err in failures[:60]:
             print(f" - {k}: {err}")
-        if len(failures) > 20:
-            print(f" ... and {len(failures) - 20} more")
+        if len(failures) > 60:
+            print(f" ... and {len(failures) - 60} more")
 
     if args.dry_run:
         print("Dry run: not writing output file.")
@@ -451,151 +1105,5 @@ def translate_locale(
     return translated_ok
 
 
-def main() -> int:
-    ap = argparse.ArgumentParser(description="Translate ARB files using TranslateGemma")
-    ap.add_argument("--in", dest="in_path", required=True, help="Input .arb file (source/template)")
-    ap.add_argument("--out", dest="out_path", help="Output .arb file (required unless using --l10n-dir)")
-    ap.add_argument("--to-locale", help="Target locale code (es, fr, de, etc.)")
-    ap.add_argument("--l10n-dir", help="Directory with locale files (translates all locales)")
-    ap.add_argument("--missing-only", action="store_true", help="Only translate missing keys")
-    ap.add_argument(
-        "--copy-of-template",
-        action="store_true",
-        help="Only translate keys whose target text still equals app_en (use for new locales copied from English)",
-    )
-    ap.add_argument(
-        "--only-locales",
-        help="Comma-separated locale codes to process with --l10n-dir (e.g. hu,ja,ko)",
-    )
-    ap.add_argument("--model", default="translategemma:latest", help="Ollama model (translategemma:latest or specific versions)")
-    ap.add_argument("--fallback-model", help="Fallback model for failed translations (e.g., translategemma:27b)")
-    ap.add_argument("--host", default="http://localhost:11434", help="Ollama host")
-    ap.add_argument("--timeout", type=float, default=120.0, help="HTTP timeout seconds")
-    ap.add_argument("--temperature", type=float, default=0.0, help="Model temperature (0.0 for deterministic)")
-    ap.add_argument("--concurrency", type=int, default=4, help="Parallel requests")
-    ap.add_argument("--retries", type=int, default=2, help="Retries per string")
-    ap.add_argument("--backoff", type=float, default=0.6, help="Backoff seconds base")
-    ap.add_argument("--dry-run", action="store_true", help="Don't write output")
-    ap.add_argument("--progress-every", type=int, default=1, help="Print progress every N strings")
-    args = ap.parse_args()
-
-    # Read source file
-    try:
-        with open(args.in_path, "r", encoding="utf-8") as f:
-            source_data = json.load(f)
-    except Exception as e:
-        print(f"Failed to read input: {e}", file=sys.stderr)
-        return 2
-
-    if not isinstance(source_data, dict):
-        print("Input JSON must be an object at top-level.", file=sys.stderr)
-        return 2
-
-    if args.missing_only and args.copy_of_template:
-        print("Use only one of --missing-only or --copy-of-template", file=sys.stderr)
-        return 2
-
-    only_locales: Optional[set] = None
-    if args.only_locales:
-        only_locales = {x.strip() for x in args.only_locales.split(",") if x.strip()}
-
-    # Process all locales if --l10n-dir is provided
-    if args.l10n_dir:
-        locales = get_all_locale_files(args.l10n_dir, args.in_path)
-        if not locales:
-            print(f"No locale files found in {args.l10n_dir}", file=sys.stderr)
-            return 1
-
-        if only_locales is not None:
-            locales = [(c, p) for c, p in locales if c in only_locales]
-            missing = only_locales - {c for c, _ in locales}
-            if missing:
-                print(f"Warning: no app_*.arb for locale code(s): {', '.join(sorted(missing))}", file=sys.stderr)
-
-        print(f"Found {len(locales)} locale file(s) to process")
-
-        total_translated = 0
-        for locale_code, locale_path in locales:
-            lang_name, lang_code = LOCALE_MAP.get(locale_code, (locale_code, locale_code))
-
-            try:
-                with open(locale_path, "r", encoding="utf-8") as f:
-                    target_data = json.load(f)
-            except Exception as e:
-                print(f"  [{locale_code}] Failed to read {locale_path}: {e}")
-                continue
-
-            missing_keys: Optional[List[str]]
-            if args.copy_of_template:
-                missing_keys = find_keys_still_template_copy(source_data, target_data)
-                if not missing_keys:
-                    print(f"  [{locale_code}] No keys still matching template")
-                    continue
-                print(f"  [{locale_code}] {len(missing_keys)} key(s) still same as template")
-            elif args.missing_only:
-                missing_keys = find_missing_keys(source_data, target_data)
-                if not missing_keys:
-                    print(f"  [{locale_code}] No missing keys")
-                    continue
-                print(f"  [{locale_code}] {len(missing_keys)} missing key(s)")
-            else:
-                missing_keys = None
-
-            result = translate_locale(
-                source_data=source_data,
-                target_data=target_data,
-                target_locale=locale_code,
-                target_lang=lang_name,
-                target_code=lang_code,
-                out_path=locale_path,
-                args=args,
-                missing_keys=missing_keys,
-            )
-            total_translated += result
-
-        print(f"\nTotal: {total_translated} string(s) translated across {len(locales)} locale(s)")
-        return 0
-
-    # Single locale mode
-    if not args.out_path or not args.to_locale:
-        print("--out and --to-locale are required when not using --l10n-dir", file=sys.stderr)
-        return 1
-
-    lang_name, lang_code = LOCALE_MAP.get(args.to_locale, (args.to_locale, args.to_locale))
-
-    # Read existing target file if --missing-only or --copy-of-template
-    target_data: Dict[str, Any] = {}
-    missing_keys: Optional[List[str]] = None
-    if (args.missing_only or args.copy_of_template) and os.path.exists(args.out_path):
-        try:
-            with open(args.out_path, "r", encoding="utf-8") as f:
-                target_data = json.load(f)
-            if args.copy_of_template:
-                missing_keys = find_keys_still_template_copy(source_data, target_data)
-                label = "still matching template"
-            else:
-                missing_keys = find_missing_keys(source_data, target_data)
-                label = "missing"
-            if not missing_keys:
-                print(f"No {label} keys in {args.out_path}")
-                return 0
-            print(f"Found {len(missing_keys)} {label} key(s) to translate")
-        except Exception as e:
-            print(f"Failed to read target file: {e}", file=sys.stderr)
-            return 2
-
-    result = translate_locale(
-        source_data=source_data,
-        target_data=target_data,
-        target_locale=args.to_locale,
-        target_lang=lang_name,
-        target_code=lang_code,
-        out_path=args.out_path,
-        args=args,
-        missing_keys=missing_keys,
-    )
-    return 0 if result >= 0 else 1
-
-
 if __name__ == "__main__":
-    raise SystemExit(main())
+    raise SystemExit(main())
\ No newline at end of file