285 lines
11 KiB
Python
285 lines
11 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
generate_alarm_translations.py
|
||
|
||
One-time script: reads AlarmKnowledgeBase.cs, calls Mistral API to translate
|
||
all alarm entries into German (de), French (fr), and Italian (it), and writes:
|
||
|
||
Resources/AlarmTranslations.de.json ← backend uses these at startup
|
||
Resources/AlarmTranslations.fr.json
|
||
Resources/AlarmTranslations.it.json
|
||
Resources/AlarmNames.de.json ← frontend lang file additions
|
||
Resources/AlarmNames.fr.json
|
||
Resources/AlarmNames.it.json
|
||
|
||
Usage:
|
||
export MISTRAL_API_KEY=your_key_here
|
||
python3 generate_alarm_translations.py
|
||
|
||
Output files can be reviewed/edited before committing.
|
||
"""
|
||
|
||
import re
|
||
import json
|
||
import os
|
||
import sys
|
||
import time
|
||
from typing import Optional
|
||
import requests
|
||
|
||
# ── Config ─────────────────────────────────────────────────────────────────
|
||
|
||
KNOWLEDGE_BASE_FILE = "Services/AlarmKnowledgeBase.cs"
|
||
RESOURCES_DIR = "Resources"
|
||
MISTRAL_URL = "https://api.mistral.ai/v1/chat/completions"
|
||
MISTRAL_MODEL = "mistral-small-latest"
|
||
BATCH_SIZE = 3 # alarms per API call — smaller = less chance of token truncation
|
||
RETRY_DELAY = 5 # seconds between retries on rate-limit
|
||
MAX_RETRIES = 3
|
||
REQUEST_TIMEOUT = (10, 90) # (connect_timeout, read_timeout) in seconds
|
||
|
||
LANGUAGES = {
|
||
"de": "German",
|
||
"fr": "French",
|
||
"it": "Italian",
|
||
}
|
||
|
||
# ── Parsing ─────────────────────────────────────────────────────────────────
|
||
|
||
def split_camel_case(name: str) -> str:
|
||
"""'AbnormalGridVoltage' → 'Abnormal Grid Voltage'"""
|
||
return re.sub(r'(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])', ' ', name).strip()
|
||
|
||
|
||
def parse_knowledge_base(filepath: str) -> dict:
|
||
"""
|
||
Parses AlarmKnowledgeBase.cs and returns a dict:
|
||
{ "AlarmKey": { "Explanation": "...", "Causes": [...], "NextSteps": [...] } }
|
||
"""
|
||
with open(filepath, "r", encoding="utf-8") as f:
|
||
content = f.read()
|
||
|
||
alarms = {}
|
||
|
||
# Find positions of all alarm key declarations: ["Key"] = new()
|
||
key_matches = list(re.finditer(r'\["(\w+)"\]\s*=\s*new\(\)', content))
|
||
|
||
for i, key_match in enumerate(key_matches):
|
||
key = key_match.group(1)
|
||
start = key_match.start()
|
||
end = key_matches[i + 1].start() if i + 1 < len(key_matches) else len(content)
|
||
block = content[start:end]
|
||
|
||
# Explanation (single string)
|
||
exp_match = re.search(r'Explanation\s*=\s*"((?:[^"\\]|\\.)*)"', block)
|
||
explanation = exp_match.group(1) if exp_match else ""
|
||
|
||
# Causes (string array)
|
||
causes_section = re.search(r'Causes\s*=\s*new\[\]\s*\{([^}]+)\}', block, re.DOTALL)
|
||
causes = re.findall(r'"((?:[^"\\]|\\.)*)"', causes_section.group(1)) if causes_section else []
|
||
|
||
# NextSteps (string array)
|
||
steps_section = re.search(r'NextSteps\s*=\s*new\[\]\s*\{([^}]+)\}', block, re.DOTALL)
|
||
next_steps = re.findall(r'"((?:[^"\\]|\\.)*)"', steps_section.group(1)) if steps_section else []
|
||
|
||
if explanation or causes or next_steps:
|
||
alarms[key] = {
|
||
"Explanation": explanation,
|
||
"Causes": causes,
|
||
"NextSteps": next_steps,
|
||
}
|
||
|
||
return alarms
|
||
|
||
|
||
# ── Mistral API ─────────────────────────────────────────────────────────────
|
||
|
||
def call_mistral(api_key: str, prompt: str) -> Optional[str]:
|
||
headers = {
|
||
"Authorization": f"Bearer {api_key}",
|
||
"Content-Type": "application/json",
|
||
}
|
||
body = {
|
||
"model": MISTRAL_MODEL,
|
||
"messages": [{"role": "user", "content": prompt}],
|
||
"max_tokens": 1400, # ~3 alarms × ~450 tokens each (German is verbose)
|
||
"temperature": 0.1, # low for consistent translations
|
||
}
|
||
|
||
for attempt in range(1, MAX_RETRIES + 1):
|
||
try:
|
||
resp = requests.post(MISTRAL_URL, headers=headers, json=body, timeout=REQUEST_TIMEOUT)
|
||
if resp.status_code == 429:
|
||
print(f" Rate limited, waiting {RETRY_DELAY}s (attempt {attempt}/{MAX_RETRIES})...")
|
||
time.sleep(RETRY_DELAY * attempt)
|
||
continue
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
content = data["choices"][0]["message"]["content"].strip()
|
||
# Strip markdown code fences if present
|
||
if content.startswith("```"):
|
||
first_newline = content.index("\n")
|
||
content = content[first_newline + 1:]
|
||
if content.endswith("```"):
|
||
content = content[:-3].strip()
|
||
return content
|
||
except requests.RequestException as e:
|
||
print(f" HTTP error: {e} (attempt {attempt}/{MAX_RETRIES})")
|
||
time.sleep(RETRY_DELAY)
|
||
|
||
return None
|
||
|
||
|
||
def translate_batch(api_key: str, batch: dict, language_name: str) -> Optional[dict]:
|
||
"""
|
||
Translates a batch of alarms into the target language.
|
||
Returns dict with same keys + translated content including a localized Name.
|
||
"""
|
||
# Build input JSON (only English content, no need to send back keys)
|
||
input_data = {}
|
||
for key, entry in batch.items():
|
||
english_name = split_camel_case(key)
|
||
input_data[key] = {
|
||
"EnglishName": english_name,
|
||
"Explanation": entry["Explanation"],
|
||
"Causes": entry["Causes"],
|
||
"NextSteps": entry["NextSteps"],
|
||
}
|
||
|
||
prompt = f"""You are translating battery energy storage system alarm descriptions into {language_name}.
|
||
Translate each alarm entry. The "Name" should be a short (2-5 word) localized display title for the alarm.
|
||
Keep technical terms accurate but use plain language a homeowner would understand.
|
||
|
||
Input JSON:
|
||
{json.dumps(input_data, ensure_ascii=False, indent=2)}
|
||
|
||
Return ONLY a valid JSON object with the same alarm keys. Each value must have exactly these fields:
|
||
{{
|
||
"Name": "short {language_name} title",
|
||
"Explanation": "translated explanation sentence",
|
||
"Causes": ["translated cause 1", "translated cause 2"],
|
||
"NextSteps": ["translated step 1", "translated step 2"]
|
||
}}
|
||
|
||
Reply with ONLY the JSON object, no markdown, no extra text."""
|
||
|
||
raw = call_mistral(api_key, prompt)
|
||
if raw is None:
|
||
return None
|
||
|
||
try:
|
||
result = json.loads(raw)
|
||
return result
|
||
except json.JSONDecodeError as e:
|
||
print(f" JSON parse error: {e}")
|
||
print(f" Raw response (first 300 chars): {raw[:300]}")
|
||
return None
|
||
|
||
|
||
# ── Main ────────────────────────────────────────────────────────────────────
|
||
|
||
def load_env_file(env_path: str) -> dict:
|
||
"""Parse a simple KEY=VALUE .env file."""
|
||
env = {}
|
||
try:
|
||
with open(env_path) as f:
|
||
for line in f:
|
||
line = line.strip()
|
||
if line and not line.startswith("#") and "=" in line:
|
||
k, _, v = line.partition("=")
|
||
env[k.strip()] = v.strip()
|
||
except FileNotFoundError:
|
||
pass
|
||
return env
|
||
|
||
|
||
def main():
|
||
# Try environment variable first, then .env file in the same directory
|
||
api_key = os.environ.get("MISTRAL_API_KEY", "").strip()
|
||
if not api_key:
|
||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||
env_vars = load_env_file(os.path.join(script_dir, ".env"))
|
||
api_key = env_vars.get("MISTRAL_API_KEY", "").strip()
|
||
|
||
if not api_key:
|
||
print("ERROR: MISTRAL_API_KEY not found in environment or .env file.")
|
||
sys.exit(1)
|
||
|
||
print("MISTRAL_API_KEY loaded.")
|
||
|
||
# Parse knowledge base
|
||
print(f"Parsing {KNOWLEDGE_BASE_FILE}...")
|
||
alarms = parse_knowledge_base(KNOWLEDGE_BASE_FILE)
|
||
print(f" Found {len(alarms)} alarm entries.")
|
||
|
||
if not alarms:
|
||
print("ERROR: No alarms parsed. Check the file path and format.")
|
||
sys.exit(1)
|
||
|
||
alarm_keys = list(alarms.keys())
|
||
os.makedirs(RESOURCES_DIR, exist_ok=True)
|
||
|
||
# Process each language
|
||
for lang_code, lang_name in LANGUAGES.items():
|
||
print(f"\n── Translating to {lang_name} ({lang_code}) ──")
|
||
|
||
translations = {} # key → {Name, Explanation, Causes, NextSteps}
|
||
alarm_name_keys = {} # "alarm_Key" → translated name (for lang JSON files)
|
||
failed_keys = []
|
||
|
||
# Split into batches
|
||
batches = [
|
||
{k: alarms[k] for k in alarm_keys[i:i + BATCH_SIZE]}
|
||
for i in range(0, len(alarm_keys), BATCH_SIZE)
|
||
]
|
||
|
||
for batch_num, batch in enumerate(batches, 1):
|
||
keys_in_batch = list(batch.keys())
|
||
print(f" Batch {batch_num}/{len(batches)}: {', '.join(keys_in_batch)}")
|
||
|
||
result = translate_batch(api_key, batch, lang_name)
|
||
|
||
if result is None:
|
||
print(f" FAILED batch {batch_num} — will mark keys as failed")
|
||
failed_keys.extend(keys_in_batch)
|
||
continue
|
||
|
||
for key in keys_in_batch:
|
||
if key in result:
|
||
entry = result[key]
|
||
translations[key] = {
|
||
"Explanation": entry.get("Explanation", ""),
|
||
"Causes": entry.get("Causes", []),
|
||
"NextSteps": entry.get("NextSteps", []),
|
||
}
|
||
alarm_name_keys[f"alarm_{key}"] = entry.get("Name", split_camel_case(key))
|
||
else:
|
||
print(f" WARNING: key '{key}' missing from batch result")
|
||
failed_keys.append(key)
|
||
|
||
# Small pause between batches to avoid rate limits
|
||
if batch_num < len(batches):
|
||
time.sleep(1)
|
||
|
||
# Write backend translation file
|
||
backend_file = os.path.join(RESOURCES_DIR, f"AlarmTranslations.{lang_code}.json")
|
||
with open(backend_file, "w", encoding="utf-8") as f:
|
||
json.dump(translations, f, ensure_ascii=False, indent=2)
|
||
print(f" Wrote {len(translations)} entries → {backend_file}")
|
||
|
||
# Write frontend alarm name file (to be merged into lang JSON)
|
||
names_file = os.path.join(RESOURCES_DIR, f"AlarmNames.{lang_code}.json")
|
||
with open(names_file, "w", encoding="utf-8") as f:
|
||
json.dump(alarm_name_keys, f, ensure_ascii=False, indent=2)
|
||
print(f" Wrote {len(alarm_name_keys)} name keys → {names_file}")
|
||
|
||
if failed_keys:
|
||
print(f" FAILED keys ({len(failed_keys)}): {failed_keys}")
|
||
|
||
print("\n✓ Done. Review the output files in Resources/ before committing.")
|
||
print(" Next: merge AlarmNames.*.json entries into src/lang/de.json, fr.json, it.json")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|