321 lines
13 KiB
Python
321 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
generate_alarm_translations.py
|
|
|
|
Post-campaign script: reads AlarmTranslationsChecked.de.json (the reviewed and
|
|
AI-synthesized German content), translates into English, French, and Italian,
|
|
and writes:
|
|
|
|
Resources/AlarmTranslations.de.json ← replace with reviewed German
|
|
Resources/AlarmTranslations.en.json ← back-translated from German
|
|
Resources/AlarmTranslations.fr.json ← translated from German
|
|
Resources/AlarmTranslations.it.json ← translated from German
|
|
Services/AlarmKnowledgeBase.cs ← updated English source (keeps same structure)
|
|
|
|
Run this AFTER the review campaign is complete:
|
|
export MISTRAL_API_KEY=your_key_here
|
|
cd csharp/App/Backend
|
|
python3 generate_alarm_translations.py
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
import time
|
|
import shutil
|
|
from typing import Optional
|
|
import requests
|
|
|
|
# ── Config ─────────────────────────────────────────────────────────────────
|
|
|
|
CHECKED_FILE = "Resources/AlarmTranslationsChecked.de.json"
|
|
KNOWLEDGE_BASE = "Services/AlarmKnowledgeBase.cs"
|
|
RESOURCES_DIR = "Resources"
|
|
MISTRAL_URL = "https://api.mistral.ai/v1/chat/completions"
|
|
MISTRAL_MODEL = "mistral-large-latest"
|
|
BATCH_SIZE = 5 # alarms per API call
|
|
RETRY_DELAY = 5 # seconds between retries on rate-limit
|
|
MAX_RETRIES = 3
|
|
REQUEST_TIMEOUT = (10, 90)
|
|
|
|
TARGET_LANGUAGES = {
|
|
"en": "English",
|
|
"fr": "French",
|
|
"it": "Italian",
|
|
}
|
|
|
|
|
|
# ── Mistral API ─────────────────────────────────────────────────────────────
|
|
|
|
def call_mistral(api_key: str, prompt: str) -> Optional[str]:
|
|
headers = {
|
|
"Authorization": f"Bearer {api_key}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
body = {
|
|
"model": MISTRAL_MODEL,
|
|
"messages": [{"role": "user", "content": prompt}],
|
|
"max_tokens": 1800,
|
|
"temperature": 0.1,
|
|
}
|
|
|
|
for attempt in range(1, MAX_RETRIES + 1):
|
|
try:
|
|
resp = requests.post(MISTRAL_URL, headers=headers, json=body, timeout=REQUEST_TIMEOUT)
|
|
if resp.status_code == 429:
|
|
print(f" Rate limited, waiting {RETRY_DELAY}s (attempt {attempt}/{MAX_RETRIES})...")
|
|
time.sleep(RETRY_DELAY * attempt)
|
|
continue
|
|
resp.raise_for_status()
|
|
content = resp.json()["choices"][0]["message"]["content"].strip()
|
|
if content.startswith("```"):
|
|
first_newline = content.index("\n")
|
|
content = content[first_newline + 1:]
|
|
if content.endswith("```"):
|
|
content = content[:-3].strip()
|
|
return content
|
|
except requests.RequestException as e:
|
|
print(f" HTTP error: {e} (attempt {attempt}/{MAX_RETRIES})")
|
|
time.sleep(RETRY_DELAY)
|
|
|
|
return None
|
|
|
|
|
|
def translate_batch(api_key: str, batch: dict, target_language: str) -> Optional[dict]:
|
|
"""
|
|
Translates a batch of German alarm entries into the target language.
|
|
Input: { "AlarmKey": { "Explanation": "...", "Causes": [...], "NextSteps": [...] } }
|
|
Output: same structure in target language.
|
|
"""
|
|
prompt = f"""You are translating battery energy storage system alarm descriptions from German into {target_language}.
|
|
The source content has been reviewed by field engineers and is accurate.
|
|
Translate faithfully — keep the same number of bullet points, same meaning, plain language for homeowners.
|
|
|
|
Input JSON (German):
|
|
{json.dumps(batch, ensure_ascii=False, indent=2)}
|
|
|
|
Return ONLY a valid JSON object with the same alarm keys. Each value must have exactly:
|
|
{{
|
|
"Explanation": "translated explanation (1 sentence)",
|
|
"Causes": ["translated cause 1", ...],
|
|
"NextSteps": ["translated step 1", ...]
|
|
}}
|
|
|
|
Reply with ONLY the JSON object, no markdown, no extra text."""
|
|
|
|
raw = call_mistral(api_key, prompt)
|
|
if raw is None:
|
|
return None
|
|
|
|
try:
|
|
return json.loads(raw)
|
|
except json.JSONDecodeError as e:
|
|
print(f" JSON parse error: {e}")
|
|
print(f" Raw (first 300 chars): {raw[:300]}")
|
|
return None
|
|
|
|
|
|
# ── AlarmKnowledgeBase.cs generation ────────────────────────────────────────
|
|
|
|
def parse_kb_key_sections(filepath: str) -> dict:
|
|
"""
|
|
Reads AlarmKnowledgeBase.cs and returns {key: "Sinexcel"|"Growatt"}
|
|
preserving the original section order.
|
|
"""
|
|
with open(filepath, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
sinexcel_match = re.search(r'SinexcelAlarms\s*=\s*new Dictionary.*?\{(.*?)^\s*\};', content, re.DOTALL | re.MULTILINE)
|
|
growatt_match = re.search(r'GrowattAlarms\s*=\s*new Dictionary.*?\{(.*?)^\s*\};', content, re.DOTALL | re.MULTILINE)
|
|
|
|
result = {}
|
|
if sinexcel_match:
|
|
for key in re.findall(r'\["(\w+)"\]\s*=\s*new\(\)', sinexcel_match.group(1)):
|
|
result[key] = "Sinexcel"
|
|
if growatt_match:
|
|
for key in re.findall(r'\["(\w+)"\]\s*=\s*new\(\)', growatt_match.group(1)):
|
|
result[key] = "Growatt"
|
|
return result
|
|
|
|
|
|
def cs_escape(s: str) -> str:
|
|
"""Escapes a string for use inside a C# double-quoted string literal."""
|
|
return s.replace("\\", "\\\\").replace('"', '\\"')
|
|
|
|
|
|
def write_knowledge_base_cs(filepath: str, en_translations: dict, key_sections: dict):
|
|
"""
|
|
Writes an updated AlarmKnowledgeBase.cs using the new English translations,
|
|
preserving the original Sinexcel/Growatt section structure.
|
|
"""
|
|
sinexcel_keys = [k for k, s in key_sections.items() if s == "Sinexcel"]
|
|
growatt_keys = [k for k, s in key_sections.items() if s == "Growatt"]
|
|
|
|
def entry_block(key: str) -> str:
|
|
entry = en_translations.get(key)
|
|
if not entry:
|
|
return f' // [{key}] — no translation available\n'
|
|
exp = cs_escape(entry.get("Explanation", ""))
|
|
causes = ",\n ".join(f'"{cs_escape(c)}"' for c in entry.get("Causes", []))
|
|
steps = ",\n ".join(f'"{cs_escape(s)}"' for s in entry.get("NextSteps", []))
|
|
return (
|
|
f' ["{key}"] = new()\n'
|
|
f' {{\n'
|
|
f' Explanation = "{exp}",\n'
|
|
f' Causes = new[] {{ {causes} }},\n'
|
|
f' NextSteps = new[] {{ {steps} }}\n'
|
|
f' }},\n'
|
|
)
|
|
|
|
lines = []
|
|
lines.append("namespace InnovEnergy.App.Backend.Services;\n")
|
|
lines.append("\n")
|
|
lines.append("/// <summary>\n")
|
|
lines.append("/// Static knowledge base for Sinexcel and Growatt alarms.\n")
|
|
lines.append("/// Provides pre-defined diagnostics without requiring Mistral API calls.\n")
|
|
lines.append("/// Updated by generate_alarm_translations.py after the review campaign.\n")
|
|
lines.append("/// </summary>\n")
|
|
lines.append("public static class AlarmKnowledgeBase\n")
|
|
lines.append("{\n")
|
|
lines.append(" public static DiagnosticResponse? TryGetDiagnosis(string alarmDescription)\n")
|
|
lines.append(" {\n")
|
|
lines.append(" if (string.IsNullOrWhiteSpace(alarmDescription)) return null;\n")
|
|
lines.append(" var normalized = alarmDescription.Trim();\n")
|
|
lines.append(" if (SinexcelAlarms.TryGetValue(normalized, out var s)) return s;\n")
|
|
lines.append(" if (GrowattAlarms.TryGetValue(normalized, out var g)) return g;\n")
|
|
lines.append(" var lower = normalized.ToLowerInvariant();\n")
|
|
lines.append(" foreach (var kvp in SinexcelAlarms) if (kvp.Key.ToLowerInvariant() == lower) return kvp.Value;\n")
|
|
lines.append(" foreach (var kvp in GrowattAlarms) if (kvp.Key.ToLowerInvariant() == lower) return kvp.Value;\n")
|
|
lines.append(" return null;\n")
|
|
lines.append(" }\n")
|
|
lines.append("\n")
|
|
lines.append(" // ── Sinexcel Alarms ──────────────────────────────────────────────────────\n")
|
|
lines.append("\n")
|
|
lines.append(" private static readonly IReadOnlyDictionary<string, DiagnosticResponse> SinexcelAlarms = new Dictionary<string, DiagnosticResponse>\n")
|
|
lines.append(" {\n")
|
|
for key in sinexcel_keys:
|
|
lines.append(entry_block(key))
|
|
lines.append(" };\n")
|
|
lines.append("\n")
|
|
lines.append(" // ── Growatt Alarms ───────────────────────────────────────────────────────\n")
|
|
lines.append("\n")
|
|
lines.append(" private static readonly IReadOnlyDictionary<string, DiagnosticResponse> GrowattAlarms = new Dictionary<string, DiagnosticResponse>\n")
|
|
lines.append(" {\n")
|
|
for key in growatt_keys:
|
|
lines.append(entry_block(key))
|
|
lines.append(" };\n")
|
|
lines.append("}\n")
|
|
|
|
with open(filepath, "w", encoding="utf-8") as f:
|
|
f.writelines(lines)
|
|
print(f" ✓ Wrote updated AlarmKnowledgeBase.cs ({len(sinexcel_keys)} Sinexcel + {len(growatt_keys)} Growatt keys)")
|
|
|
|
|
|
# ── Main ────────────────────────────────────────────────────────────────────
|
|
|
|
def load_env_file(env_path: str) -> dict:
|
|
env = {}
|
|
try:
|
|
with open(env_path) as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line and not line.startswith("#") and "=" in line:
|
|
k, _, v = line.partition("=")
|
|
env[k.strip()] = v.strip()
|
|
except FileNotFoundError:
|
|
pass
|
|
return env
|
|
|
|
|
|
def main():
|
|
api_key = os.environ.get("MISTRAL_API_KEY", "").strip()
|
|
if not api_key:
|
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
api_key = load_env_file(os.path.join(script_dir, ".env")).get("MISTRAL_API_KEY", "").strip()
|
|
|
|
if not api_key:
|
|
print("ERROR: MISTRAL_API_KEY not found in environment or .env file.")
|
|
sys.exit(1)
|
|
|
|
print("MISTRAL_API_KEY loaded.")
|
|
|
|
# Load reviewed German source
|
|
if not os.path.exists(CHECKED_FILE):
|
|
print(f"ERROR: {CHECKED_FILE} not found. Run the review campaign first.")
|
|
sys.exit(1)
|
|
|
|
with open(CHECKED_FILE, "r", encoding="utf-8") as f:
|
|
german_source = json.load(f)
|
|
|
|
alarm_keys = list(german_source.keys())
|
|
print(f"Loaded {len(alarm_keys)} alarms from {CHECKED_FILE}.")
|
|
|
|
# Step 1: copy reviewed German as the new de.json
|
|
de_out = os.path.join(RESOURCES_DIR, "AlarmTranslations.de.json")
|
|
shutil.copy(CHECKED_FILE, de_out)
|
|
print(f"\n✓ Copied reviewed German → {de_out}")
|
|
|
|
# Step 2: translate to en, fr, it
|
|
all_translations = {} # lang_code → {key → entry}
|
|
for lang_code, lang_name in TARGET_LANGUAGES.items():
|
|
print(f"\n── Translating to {lang_name} ({lang_code}) ──")
|
|
|
|
translations = {}
|
|
failed_keys = []
|
|
|
|
batches = [
|
|
{k: german_source[k] for k in alarm_keys[i:i + BATCH_SIZE]}
|
|
for i in range(0, len(alarm_keys), BATCH_SIZE)
|
|
]
|
|
|
|
for batch_num, batch in enumerate(batches, 1):
|
|
keys_in_batch = list(batch.keys())
|
|
print(f" Batch {batch_num}/{len(batches)}: {', '.join(keys_in_batch)}")
|
|
|
|
result = translate_batch(api_key, batch, lang_name)
|
|
|
|
if result is None:
|
|
print(f" FAILED batch {batch_num} — marking keys as failed")
|
|
failed_keys.extend(keys_in_batch)
|
|
continue
|
|
|
|
for key in keys_in_batch:
|
|
if key in result:
|
|
entry = result[key]
|
|
translations[key] = {
|
|
"Explanation": entry.get("Explanation", ""),
|
|
"Causes": entry.get("Causes", []),
|
|
"NextSteps": entry.get("NextSteps", []),
|
|
}
|
|
else:
|
|
print(f" WARNING: key '{key}' missing from batch result")
|
|
failed_keys.append(key)
|
|
|
|
if batch_num < len(batches):
|
|
time.sleep(1)
|
|
|
|
all_translations[lang_code] = translations
|
|
out_file = os.path.join(RESOURCES_DIR, f"AlarmTranslations.{lang_code}.json")
|
|
with open(out_file, "w", encoding="utf-8") as f:
|
|
json.dump(translations, f, ensure_ascii=False, indent=2)
|
|
print(f" ✓ Wrote {len(translations)} entries → {out_file}")
|
|
|
|
if failed_keys:
|
|
print(f" ⚠ Failed keys ({len(failed_keys)}): {failed_keys}")
|
|
|
|
# Step 3: update AlarmKnowledgeBase.cs with the new English back-translation
|
|
print("\n── Updating AlarmKnowledgeBase.cs ──")
|
|
if "en" in all_translations and os.path.exists(KNOWLEDGE_BASE):
|
|
key_sections = parse_kb_key_sections(KNOWLEDGE_BASE)
|
|
write_knowledge_base_cs(KNOWLEDGE_BASE, all_translations["en"], key_sections)
|
|
else:
|
|
print(" Skipped — en.json not generated or AlarmKnowledgeBase.cs not found.")
|
|
|
|
print("\n✓ Done. Review the output files before deploying.")
|
|
print(" Next: cd csharp/App/Backend && dotnet build && ./deploy.sh")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|