#!/usr/bin/env python3 """ generate_alarm_translations.py One-time script: reads AlarmKnowledgeBase.cs, calls Mistral API to translate all alarm entries into German (de), French (fr), and Italian (it), and writes: Resources/AlarmTranslations.de.json ← backend uses these at startup Resources/AlarmTranslations.fr.json Resources/AlarmTranslations.it.json Resources/AlarmNames.de.json ← frontend lang file additions Resources/AlarmNames.fr.json Resources/AlarmNames.it.json Usage: export MISTRAL_API_KEY=your_key_here python3 generate_alarm_translations.py Output files can be reviewed/edited before committing. """ import re import json import os import sys import time from typing import Optional import requests # ── Config ───────────────────────────────────────────────────────────────── KNOWLEDGE_BASE_FILE = "Services/AlarmKnowledgeBase.cs" RESOURCES_DIR = "Resources" MISTRAL_URL = "https://api.mistral.ai/v1/chat/completions" MISTRAL_MODEL = "mistral-small-latest" BATCH_SIZE = 3 # alarms per API call — smaller = less chance of token truncation RETRY_DELAY = 5 # seconds between retries on rate-limit MAX_RETRIES = 3 REQUEST_TIMEOUT = (10, 90) # (connect_timeout, read_timeout) in seconds LANGUAGES = { "de": "German", "fr": "French", "it": "Italian", } # ── Parsing ───────────────────────────────────────────────────────────────── def split_camel_case(name: str) -> str: """'AbnormalGridVoltage' → 'Abnormal Grid Voltage'""" return re.sub(r'(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])', ' ', name).strip() def parse_knowledge_base(filepath: str) -> dict: """ Parses AlarmKnowledgeBase.cs and returns a dict: { "AlarmKey": { "Explanation": "...", "Causes": [...], "NextSteps": [...] } } """ with open(filepath, "r", encoding="utf-8") as f: content = f.read() alarms = {} # Find positions of all alarm key declarations: ["Key"] = new() key_matches = list(re.finditer(r'\["(\w+)"\]\s*=\s*new\(\)', content)) for i, key_match in enumerate(key_matches): key = key_match.group(1) start = key_match.start() end = key_matches[i + 1].start() if i + 1 < len(key_matches) else len(content) block = content[start:end] # Explanation (single string) exp_match = re.search(r'Explanation\s*=\s*"((?:[^"\\]|\\.)*)"', block) explanation = exp_match.group(1) if exp_match else "" # Causes (string array) causes_section = re.search(r'Causes\s*=\s*new\[\]\s*\{([^}]+)\}', block, re.DOTALL) causes = re.findall(r'"((?:[^"\\]|\\.)*)"', causes_section.group(1)) if causes_section else [] # NextSteps (string array) steps_section = re.search(r'NextSteps\s*=\s*new\[\]\s*\{([^}]+)\}', block, re.DOTALL) next_steps = re.findall(r'"((?:[^"\\]|\\.)*)"', steps_section.group(1)) if steps_section else [] if explanation or causes or next_steps: alarms[key] = { "Explanation": explanation, "Causes": causes, "NextSteps": next_steps, } return alarms # ── Mistral API ───────────────────────────────────────────────────────────── def call_mistral(api_key: str, prompt: str) -> Optional[str]: headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", } body = { "model": MISTRAL_MODEL, "messages": [{"role": "user", "content": prompt}], "max_tokens": 1400, # ~3 alarms × ~450 tokens each (German is verbose) "temperature": 0.1, # low for consistent translations } for attempt in range(1, MAX_RETRIES + 1): try: resp = requests.post(MISTRAL_URL, headers=headers, json=body, timeout=REQUEST_TIMEOUT) if resp.status_code == 429: print(f" Rate limited, waiting {RETRY_DELAY}s (attempt {attempt}/{MAX_RETRIES})...") time.sleep(RETRY_DELAY * attempt) continue resp.raise_for_status() data = resp.json() content = data["choices"][0]["message"]["content"].strip() # Strip markdown code fences if present if content.startswith("```"): first_newline = content.index("\n") content = content[first_newline + 1:] if content.endswith("```"): content = content[:-3].strip() return content except requests.RequestException as e: print(f" HTTP error: {e} (attempt {attempt}/{MAX_RETRIES})") time.sleep(RETRY_DELAY) return None def translate_batch(api_key: str, batch: dict, language_name: str) -> Optional[dict]: """ Translates a batch of alarms into the target language. Returns dict with same keys + translated content including a localized Name. """ # Build input JSON (only English content, no need to send back keys) input_data = {} for key, entry in batch.items(): english_name = split_camel_case(key) input_data[key] = { "EnglishName": english_name, "Explanation": entry["Explanation"], "Causes": entry["Causes"], "NextSteps": entry["NextSteps"], } prompt = f"""You are translating battery energy storage system alarm descriptions into {language_name}. Translate each alarm entry. The "Name" should be a short (2-5 word) localized display title for the alarm. Keep technical terms accurate but use plain language a homeowner would understand. Input JSON: {json.dumps(input_data, ensure_ascii=False, indent=2)} Return ONLY a valid JSON object with the same alarm keys. Each value must have exactly these fields: {{ "Name": "short {language_name} title", "Explanation": "translated explanation sentence", "Causes": ["translated cause 1", "translated cause 2"], "NextSteps": ["translated step 1", "translated step 2"] }} Reply with ONLY the JSON object, no markdown, no extra text.""" raw = call_mistral(api_key, prompt) if raw is None: return None try: result = json.loads(raw) return result except json.JSONDecodeError as e: print(f" JSON parse error: {e}") print(f" Raw response (first 300 chars): {raw[:300]}") return None # ── Main ──────────────────────────────────────────────────────────────────── def load_env_file(env_path: str) -> dict: """Parse a simple KEY=VALUE .env file.""" env = {} try: with open(env_path) as f: for line in f: line = line.strip() if line and not line.startswith("#") and "=" in line: k, _, v = line.partition("=") env[k.strip()] = v.strip() except FileNotFoundError: pass return env def main(): # Try environment variable first, then .env file in the same directory api_key = os.environ.get("MISTRAL_API_KEY", "").strip() if not api_key: script_dir = os.path.dirname(os.path.abspath(__file__)) env_vars = load_env_file(os.path.join(script_dir, ".env")) api_key = env_vars.get("MISTRAL_API_KEY", "").strip() if not api_key: print("ERROR: MISTRAL_API_KEY not found in environment or .env file.") sys.exit(1) print("MISTRAL_API_KEY loaded.") # Parse knowledge base print(f"Parsing {KNOWLEDGE_BASE_FILE}...") alarms = parse_knowledge_base(KNOWLEDGE_BASE_FILE) print(f" Found {len(alarms)} alarm entries.") if not alarms: print("ERROR: No alarms parsed. Check the file path and format.") sys.exit(1) alarm_keys = list(alarms.keys()) os.makedirs(RESOURCES_DIR, exist_ok=True) # Process each language for lang_code, lang_name in LANGUAGES.items(): print(f"\n── Translating to {lang_name} ({lang_code}) ──") translations = {} # key → {Name, Explanation, Causes, NextSteps} alarm_name_keys = {} # "alarm_Key" → translated name (for lang JSON files) failed_keys = [] # Split into batches batches = [ {k: alarms[k] for k in alarm_keys[i:i + BATCH_SIZE]} for i in range(0, len(alarm_keys), BATCH_SIZE) ] for batch_num, batch in enumerate(batches, 1): keys_in_batch = list(batch.keys()) print(f" Batch {batch_num}/{len(batches)}: {', '.join(keys_in_batch)}") result = translate_batch(api_key, batch, lang_name) if result is None: print(f" FAILED batch {batch_num} — will mark keys as failed") failed_keys.extend(keys_in_batch) continue for key in keys_in_batch: if key in result: entry = result[key] translations[key] = { "Explanation": entry.get("Explanation", ""), "Causes": entry.get("Causes", []), "NextSteps": entry.get("NextSteps", []), } alarm_name_keys[f"alarm_{key}"] = entry.get("Name", split_camel_case(key)) else: print(f" WARNING: key '{key}' missing from batch result") failed_keys.append(key) # Small pause between batches to avoid rate limits if batch_num < len(batches): time.sleep(1) # Write backend translation file backend_file = os.path.join(RESOURCES_DIR, f"AlarmTranslations.{lang_code}.json") with open(backend_file, "w", encoding="utf-8") as f: json.dump(translations, f, ensure_ascii=False, indent=2) print(f" Wrote {len(translations)} entries → {backend_file}") # Write frontend alarm name file (to be merged into lang JSON) names_file = os.path.join(RESOURCES_DIR, f"AlarmNames.{lang_code}.json") with open(names_file, "w", encoding="utf-8") as f: json.dump(alarm_name_keys, f, ensure_ascii=False, indent=2) print(f" Wrote {len(alarm_name_keys)} name keys → {names_file}") if failed_keys: print(f" FAILED keys ({len(failed_keys)}): {failed_keys}") print("\n✓ Done. Review the output files in Resources/ before committing.") print(" Next: merge AlarmNames.*.json entries into src/lang/de.json, fr.json, it.json") if __name__ == "__main__": main()