Innovenergy_trunk/csharp/App/Backend/generate_alarm_translations.py

285 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
generate_alarm_translations.py
One-time script: reads AlarmKnowledgeBase.cs, calls Mistral API to translate
all alarm entries into German (de), French (fr), and Italian (it), and writes:
Resources/AlarmTranslations.de.json ← backend uses these at startup
Resources/AlarmTranslations.fr.json
Resources/AlarmTranslations.it.json
Resources/AlarmNames.de.json ← frontend lang file additions
Resources/AlarmNames.fr.json
Resources/AlarmNames.it.json
Usage:
export MISTRAL_API_KEY=your_key_here
python3 generate_alarm_translations.py
Output files can be reviewed/edited before committing.
"""
import re
import json
import os
import sys
import time
from typing import Optional
import requests
# ── Config ─────────────────────────────────────────────────────────────────
KNOWLEDGE_BASE_FILE = "Services/AlarmKnowledgeBase.cs"
RESOURCES_DIR = "Resources"
MISTRAL_URL = "https://api.mistral.ai/v1/chat/completions"
MISTRAL_MODEL = "mistral-small-latest"
BATCH_SIZE = 3 # alarms per API call — smaller = less chance of token truncation
RETRY_DELAY = 5 # seconds between retries on rate-limit
MAX_RETRIES = 3
REQUEST_TIMEOUT = (10, 90) # (connect_timeout, read_timeout) in seconds
LANGUAGES = {
"de": "German",
"fr": "French",
"it": "Italian",
}
# ── Parsing ─────────────────────────────────────────────────────────────────
def split_camel_case(name: str) -> str:
"""'AbnormalGridVoltage''Abnormal Grid Voltage'"""
return re.sub(r'(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])', ' ', name).strip()
def parse_knowledge_base(filepath: str) -> dict:
"""
Parses AlarmKnowledgeBase.cs and returns a dict:
{ "AlarmKey": { "Explanation": "...", "Causes": [...], "NextSteps": [...] } }
"""
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
alarms = {}
# Find positions of all alarm key declarations: ["Key"] = new()
key_matches = list(re.finditer(r'\["(\w+)"\]\s*=\s*new\(\)', content))
for i, key_match in enumerate(key_matches):
key = key_match.group(1)
start = key_match.start()
end = key_matches[i + 1].start() if i + 1 < len(key_matches) else len(content)
block = content[start:end]
# Explanation (single string)
exp_match = re.search(r'Explanation\s*=\s*"((?:[^"\\]|\\.)*)"', block)
explanation = exp_match.group(1) if exp_match else ""
# Causes (string array)
causes_section = re.search(r'Causes\s*=\s*new\[\]\s*\{([^}]+)\}', block, re.DOTALL)
causes = re.findall(r'"((?:[^"\\]|\\.)*)"', causes_section.group(1)) if causes_section else []
# NextSteps (string array)
steps_section = re.search(r'NextSteps\s*=\s*new\[\]\s*\{([^}]+)\}', block, re.DOTALL)
next_steps = re.findall(r'"((?:[^"\\]|\\.)*)"', steps_section.group(1)) if steps_section else []
if explanation or causes or next_steps:
alarms[key] = {
"Explanation": explanation,
"Causes": causes,
"NextSteps": next_steps,
}
return alarms
# ── Mistral API ─────────────────────────────────────────────────────────────
def call_mistral(api_key: str, prompt: str) -> Optional[str]:
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
body = {
"model": MISTRAL_MODEL,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 1400, # ~3 alarms × ~450 tokens each (German is verbose)
"temperature": 0.1, # low for consistent translations
}
for attempt in range(1, MAX_RETRIES + 1):
try:
resp = requests.post(MISTRAL_URL, headers=headers, json=body, timeout=REQUEST_TIMEOUT)
if resp.status_code == 429:
print(f" Rate limited, waiting {RETRY_DELAY}s (attempt {attempt}/{MAX_RETRIES})...")
time.sleep(RETRY_DELAY * attempt)
continue
resp.raise_for_status()
data = resp.json()
content = data["choices"][0]["message"]["content"].strip()
# Strip markdown code fences if present
if content.startswith("```"):
first_newline = content.index("\n")
content = content[first_newline + 1:]
if content.endswith("```"):
content = content[:-3].strip()
return content
except requests.RequestException as e:
print(f" HTTP error: {e} (attempt {attempt}/{MAX_RETRIES})")
time.sleep(RETRY_DELAY)
return None
def translate_batch(api_key: str, batch: dict, language_name: str) -> Optional[dict]:
"""
Translates a batch of alarms into the target language.
Returns dict with same keys + translated content including a localized Name.
"""
# Build input JSON (only English content, no need to send back keys)
input_data = {}
for key, entry in batch.items():
english_name = split_camel_case(key)
input_data[key] = {
"EnglishName": english_name,
"Explanation": entry["Explanation"],
"Causes": entry["Causes"],
"NextSteps": entry["NextSteps"],
}
prompt = f"""You are translating battery energy storage system alarm descriptions into {language_name}.
Translate each alarm entry. The "Name" should be a short (2-5 word) localized display title for the alarm.
Keep technical terms accurate but use plain language a homeowner would understand.
Input JSON:
{json.dumps(input_data, ensure_ascii=False, indent=2)}
Return ONLY a valid JSON object with the same alarm keys. Each value must have exactly these fields:
{{
"Name": "short {language_name} title",
"Explanation": "translated explanation sentence",
"Causes": ["translated cause 1", "translated cause 2"],
"NextSteps": ["translated step 1", "translated step 2"]
}}
Reply with ONLY the JSON object, no markdown, no extra text."""
raw = call_mistral(api_key, prompt)
if raw is None:
return None
try:
result = json.loads(raw)
return result
except json.JSONDecodeError as e:
print(f" JSON parse error: {e}")
print(f" Raw response (first 300 chars): {raw[:300]}")
return None
# ── Main ────────────────────────────────────────────────────────────────────
def load_env_file(env_path: str) -> dict:
"""Parse a simple KEY=VALUE .env file."""
env = {}
try:
with open(env_path) as f:
for line in f:
line = line.strip()
if line and not line.startswith("#") and "=" in line:
k, _, v = line.partition("=")
env[k.strip()] = v.strip()
except FileNotFoundError:
pass
return env
def main():
# Try environment variable first, then .env file in the same directory
api_key = os.environ.get("MISTRAL_API_KEY", "").strip()
if not api_key:
script_dir = os.path.dirname(os.path.abspath(__file__))
env_vars = load_env_file(os.path.join(script_dir, ".env"))
api_key = env_vars.get("MISTRAL_API_KEY", "").strip()
if not api_key:
print("ERROR: MISTRAL_API_KEY not found in environment or .env file.")
sys.exit(1)
print("MISTRAL_API_KEY loaded.")
# Parse knowledge base
print(f"Parsing {KNOWLEDGE_BASE_FILE}...")
alarms = parse_knowledge_base(KNOWLEDGE_BASE_FILE)
print(f" Found {len(alarms)} alarm entries.")
if not alarms:
print("ERROR: No alarms parsed. Check the file path and format.")
sys.exit(1)
alarm_keys = list(alarms.keys())
os.makedirs(RESOURCES_DIR, exist_ok=True)
# Process each language
for lang_code, lang_name in LANGUAGES.items():
print(f"\n── Translating to {lang_name} ({lang_code}) ──")
translations = {} # key → {Name, Explanation, Causes, NextSteps}
alarm_name_keys = {} # "alarm_Key" → translated name (for lang JSON files)
failed_keys = []
# Split into batches
batches = [
{k: alarms[k] for k in alarm_keys[i:i + BATCH_SIZE]}
for i in range(0, len(alarm_keys), BATCH_SIZE)
]
for batch_num, batch in enumerate(batches, 1):
keys_in_batch = list(batch.keys())
print(f" Batch {batch_num}/{len(batches)}: {', '.join(keys_in_batch)}")
result = translate_batch(api_key, batch, lang_name)
if result is None:
print(f" FAILED batch {batch_num} — will mark keys as failed")
failed_keys.extend(keys_in_batch)
continue
for key in keys_in_batch:
if key in result:
entry = result[key]
translations[key] = {
"Explanation": entry.get("Explanation", ""),
"Causes": entry.get("Causes", []),
"NextSteps": entry.get("NextSteps", []),
}
alarm_name_keys[f"alarm_{key}"] = entry.get("Name", split_camel_case(key))
else:
print(f" WARNING: key '{key}' missing from batch result")
failed_keys.append(key)
# Small pause between batches to avoid rate limits
if batch_num < len(batches):
time.sleep(1)
# Write backend translation file
backend_file = os.path.join(RESOURCES_DIR, f"AlarmTranslations.{lang_code}.json")
with open(backend_file, "w", encoding="utf-8") as f:
json.dump(translations, f, ensure_ascii=False, indent=2)
print(f" Wrote {len(translations)} entries → {backend_file}")
# Write frontend alarm name file (to be merged into lang JSON)
names_file = os.path.join(RESOURCES_DIR, f"AlarmNames.{lang_code}.json")
with open(names_file, "w", encoding="utf-8") as f:
json.dump(alarm_name_keys, f, ensure_ascii=False, indent=2)
print(f" Wrote {len(alarm_name_keys)} name keys → {names_file}")
if failed_keys:
print(f" FAILED keys ({len(failed_keys)}): {failed_keys}")
print("\n✓ Done. Review the output files in Resources/ before committing.")
print(" Next: merge AlarmNames.*.json entries into src/lang/de.json, fr.json, it.json")
if __name__ == "__main__":
main()