#!/usr/bin/env python3 """ generate_alarm_translations.py Post-campaign script: reads AlarmTranslationsChecked.de.json (the reviewed and AI-synthesized German content), translates into English, French, and Italian, and writes: Resources/AlarmTranslations.de.json ← replace with reviewed German Resources/AlarmTranslations.en.json ← back-translated from German Resources/AlarmTranslations.fr.json ← translated from German Resources/AlarmTranslations.it.json ← translated from German Services/AlarmKnowledgeBase.cs ← updated English source (keeps same structure) Run this AFTER the review campaign is complete: export MISTRAL_API_KEY=your_key_here cd csharp/App/Backend python3 generate_alarm_translations.py """ import json import os import re import sys import time import shutil from typing import Optional import requests # ── Config ───────────────────────────────────────────────────────────────── CHECKED_FILE = "Resources/AlarmTranslationsChecked.de.json" KNOWLEDGE_BASE = "Services/AlarmKnowledgeBase.cs" RESOURCES_DIR = "Resources" MISTRAL_URL = "https://api.mistral.ai/v1/chat/completions" MISTRAL_MODEL = "mistral-large-latest" BATCH_SIZE = 5 # alarms per API call RETRY_DELAY = 5 # seconds between retries on rate-limit MAX_RETRIES = 3 REQUEST_TIMEOUT = (10, 90) TARGET_LANGUAGES = { "en": "English", "fr": "French", "it": "Italian", } # ── Mistral API ───────────────────────────────────────────────────────────── def call_mistral(api_key: str, prompt: str) -> Optional[str]: headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", } body = { "model": MISTRAL_MODEL, "messages": [{"role": "user", "content": prompt}], "max_tokens": 1800, "temperature": 0.1, } for attempt in range(1, MAX_RETRIES + 1): try: resp = requests.post(MISTRAL_URL, headers=headers, json=body, timeout=REQUEST_TIMEOUT) if resp.status_code == 429: print(f" Rate limited, waiting {RETRY_DELAY}s (attempt {attempt}/{MAX_RETRIES})...") time.sleep(RETRY_DELAY * attempt) continue resp.raise_for_status() content = resp.json()["choices"][0]["message"]["content"].strip() if content.startswith("```"): first_newline = content.index("\n") content = content[first_newline + 1:] if content.endswith("```"): content = content[:-3].strip() return content except requests.RequestException as e: print(f" HTTP error: {e} (attempt {attempt}/{MAX_RETRIES})") time.sleep(RETRY_DELAY) return None def translate_batch(api_key: str, batch: dict, target_language: str) -> Optional[dict]: """ Translates a batch of German alarm entries into the target language. Input: { "AlarmKey": { "Explanation": "...", "Causes": [...], "NextSteps": [...] } } Output: same structure in target language. """ prompt = f"""You are translating battery energy storage system alarm descriptions from German into {target_language}. The source content has been reviewed by field engineers and is accurate. Translate faithfully — keep the same number of bullet points, same meaning, plain language for homeowners. Input JSON (German): {json.dumps(batch, ensure_ascii=False, indent=2)} Return ONLY a valid JSON object with the same alarm keys. Each value must have exactly: {{ "Explanation": "translated explanation (1 sentence)", "Causes": ["translated cause 1", ...], "NextSteps": ["translated step 1", ...] }} Reply with ONLY the JSON object, no markdown, no extra text.""" raw = call_mistral(api_key, prompt) if raw is None: return None try: return json.loads(raw) except json.JSONDecodeError as e: print(f" JSON parse error: {e}") print(f" Raw (first 300 chars): {raw[:300]}") return None # ── AlarmKnowledgeBase.cs generation ──────────────────────────────────────── def parse_kb_key_sections(filepath: str) -> dict: """ Reads AlarmKnowledgeBase.cs and returns {key: "Sinexcel"|"Growatt"} preserving the original section order. """ with open(filepath, "r", encoding="utf-8") as f: content = f.read() sinexcel_match = re.search(r'SinexcelAlarms\s*=\s*new Dictionary.*?\{(.*?)^\s*\};', content, re.DOTALL | re.MULTILINE) growatt_match = re.search(r'GrowattAlarms\s*=\s*new Dictionary.*?\{(.*?)^\s*\};', content, re.DOTALL | re.MULTILINE) result = {} if sinexcel_match: for key in re.findall(r'\["(\w+)"\]\s*=\s*new\(\)', sinexcel_match.group(1)): result[key] = "Sinexcel" if growatt_match: for key in re.findall(r'\["(\w+)"\]\s*=\s*new\(\)', growatt_match.group(1)): result[key] = "Growatt" return result def cs_escape(s: str) -> str: """Escapes a string for use inside a C# double-quoted string literal.""" return s.replace("\\", "\\\\").replace('"', '\\"') def write_knowledge_base_cs(filepath: str, en_translations: dict, key_sections: dict): """ Writes an updated AlarmKnowledgeBase.cs using the new English translations, preserving the original Sinexcel/Growatt section structure. """ sinexcel_keys = [k for k, s in key_sections.items() if s == "Sinexcel"] growatt_keys = [k for k, s in key_sections.items() if s == "Growatt"] def entry_block(key: str) -> str: entry = en_translations.get(key) if not entry: return f' // [{key}] — no translation available\n' exp = cs_escape(entry.get("Explanation", "")) causes = ",\n ".join(f'"{cs_escape(c)}"' for c in entry.get("Causes", [])) steps = ",\n ".join(f'"{cs_escape(s)}"' for s in entry.get("NextSteps", [])) return ( f' ["{key}"] = new()\n' f' {{\n' f' Explanation = "{exp}",\n' f' Causes = new[] {{ {causes} }},\n' f' NextSteps = new[] {{ {steps} }}\n' f' }},\n' ) lines = [] lines.append("namespace InnovEnergy.App.Backend.Services;\n") lines.append("\n") lines.append("/// \n") lines.append("/// Static knowledge base for Sinexcel and Growatt alarms.\n") lines.append("/// Provides pre-defined diagnostics without requiring Mistral API calls.\n") lines.append("/// Updated by generate_alarm_translations.py after the review campaign.\n") lines.append("/// \n") lines.append("public static class AlarmKnowledgeBase\n") lines.append("{\n") lines.append(" public static DiagnosticResponse? TryGetDiagnosis(string alarmDescription)\n") lines.append(" {\n") lines.append(" if (string.IsNullOrWhiteSpace(alarmDescription)) return null;\n") lines.append(" var normalized = alarmDescription.Trim();\n") lines.append(" if (SinexcelAlarms.TryGetValue(normalized, out var s)) return s;\n") lines.append(" if (GrowattAlarms.TryGetValue(normalized, out var g)) return g;\n") lines.append(" var lower = normalized.ToLowerInvariant();\n") lines.append(" foreach (var kvp in SinexcelAlarms) if (kvp.Key.ToLowerInvariant() == lower) return kvp.Value;\n") lines.append(" foreach (var kvp in GrowattAlarms) if (kvp.Key.ToLowerInvariant() == lower) return kvp.Value;\n") lines.append(" return null;\n") lines.append(" }\n") lines.append("\n") lines.append(" // ── Sinexcel Alarms ──────────────────────────────────────────────────────\n") lines.append("\n") lines.append(" private static readonly IReadOnlyDictionary SinexcelAlarms = new Dictionary\n") lines.append(" {\n") for key in sinexcel_keys: lines.append(entry_block(key)) lines.append(" };\n") lines.append("\n") lines.append(" // ── Growatt Alarms ───────────────────────────────────────────────────────\n") lines.append("\n") lines.append(" private static readonly IReadOnlyDictionary GrowattAlarms = new Dictionary\n") lines.append(" {\n") for key in growatt_keys: lines.append(entry_block(key)) lines.append(" };\n") lines.append("}\n") with open(filepath, "w", encoding="utf-8") as f: f.writelines(lines) print(f" ✓ Wrote updated AlarmKnowledgeBase.cs ({len(sinexcel_keys)} Sinexcel + {len(growatt_keys)} Growatt keys)") # ── Main ──────────────────────────────────────────────────────────────────── def load_env_file(env_path: str) -> dict: env = {} try: with open(env_path) as f: for line in f: line = line.strip() if line and not line.startswith("#") and "=" in line: k, _, v = line.partition("=") env[k.strip()] = v.strip() except FileNotFoundError: pass return env def main(): api_key = os.environ.get("MISTRAL_API_KEY", "").strip() if not api_key: script_dir = os.path.dirname(os.path.abspath(__file__)) api_key = load_env_file(os.path.join(script_dir, ".env")).get("MISTRAL_API_KEY", "").strip() if not api_key: print("ERROR: MISTRAL_API_KEY not found in environment or .env file.") sys.exit(1) print("MISTRAL_API_KEY loaded.") # Load reviewed German source if not os.path.exists(CHECKED_FILE): print(f"ERROR: {CHECKED_FILE} not found. Run the review campaign first.") sys.exit(1) with open(CHECKED_FILE, "r", encoding="utf-8") as f: german_source = json.load(f) alarm_keys = list(german_source.keys()) print(f"Loaded {len(alarm_keys)} alarms from {CHECKED_FILE}.") # Step 1: copy reviewed German as the new de.json de_out = os.path.join(RESOURCES_DIR, "AlarmTranslations.de.json") shutil.copy(CHECKED_FILE, de_out) print(f"\n✓ Copied reviewed German → {de_out}") # Step 2: translate to en, fr, it all_translations = {} # lang_code → {key → entry} for lang_code, lang_name in TARGET_LANGUAGES.items(): print(f"\n── Translating to {lang_name} ({lang_code}) ──") translations = {} failed_keys = [] batches = [ {k: german_source[k] for k in alarm_keys[i:i + BATCH_SIZE]} for i in range(0, len(alarm_keys), BATCH_SIZE) ] for batch_num, batch in enumerate(batches, 1): keys_in_batch = list(batch.keys()) print(f" Batch {batch_num}/{len(batches)}: {', '.join(keys_in_batch)}") result = translate_batch(api_key, batch, lang_name) if result is None: print(f" FAILED batch {batch_num} — marking keys as failed") failed_keys.extend(keys_in_batch) continue for key in keys_in_batch: if key in result: entry = result[key] translations[key] = { "Explanation": entry.get("Explanation", ""), "Causes": entry.get("Causes", []), "NextSteps": entry.get("NextSteps", []), } else: print(f" WARNING: key '{key}' missing from batch result") failed_keys.append(key) if batch_num < len(batches): time.sleep(1) all_translations[lang_code] = translations out_file = os.path.join(RESOURCES_DIR, f"AlarmTranslations.{lang_code}.json") with open(out_file, "w", encoding="utf-8") as f: json.dump(translations, f, ensure_ascii=False, indent=2) print(f" ✓ Wrote {len(translations)} entries → {out_file}") if failed_keys: print(f" ⚠ Failed keys ({len(failed_keys)}): {failed_keys}") # Step 3: update AlarmKnowledgeBase.cs with the new English back-translation print("\n── Updating AlarmKnowledgeBase.cs ──") if "en" in all_translations and os.path.exists(KNOWLEDGE_BASE): key_sections = parse_kb_key_sections(KNOWLEDGE_BASE) write_knowledge_base_cs(KNOWLEDGE_BASE, all_translations["en"], key_sections) else: print(" Skipped — en.json not generated or AlarmKnowledgeBase.cs not found.") print("\n✓ Done. Review the output files before deploying.") print(" Next: cd csharp/App/Backend && dotnet build && ./deploy.sh") if __name__ == "__main__": main()