From 9cce0f65aa2a06f0c44579759b5a5504b6cc46f3 Mon Sep 17 00:00:00 2001
From: Hayden <64056131+hay-kot@users.noreply.github.com>
Date: Fri, 4 Jul 2025 19:00:23 -0500
Subject: [PATCH] chore: automatic crowdin sync via gh actions (#5630)

---
 .github/workflows/locale-sync.yml     | 103 ++++++++++++++++++++++++++
 Taskfile.yml                          |   2 +-
 dev/code-generation/gen_ts_locales.py |  16 ++--
 dev/code-generation/main.py           |  41 ++++++++--
 dev/code-generation/utils/template.py |  20 +++--
 5 files changed, 161 insertions(+), 21 deletions(-)
 create mode 100644 .github/workflows/locale-sync.yml

diff --git a/.github/workflows/locale-sync.yml b/.github/workflows/locale-sync.yml
new file mode 100644
index 000000000..f76d1e4bd
--- /dev/null
+++ b/.github/workflows/locale-sync.yml
@@ -0,0 +1,103 @@
+name: Automatic Locale Sync
+
+on:
+  schedule:
+    # Run every Sunday at 2 AM UTC
+    - cron: '0 2 * * 0'
+  workflow_dispatch:
+    # Allow manual triggering from the GitHub UI
+
+jobs:
+  sync-locales:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install Poetry
+        uses: snok/install-poetry@v1
+        with:
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+
+      - name: Load cached venv
+        id: cached-poetry-dependencies
+        uses: actions/cache@v4
+        with:
+          path: .venv
+          key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}
+
+      - name: Check venv cache
+        id: cache-validate
+        if: steps.cached-poetry-dependencies.outputs.cache-hit == 'true'
+        run: |
+          echo "import fastapi;print('venv good?')" > test.py && poetry run python test.py && echo "cache-hit-success=true" >> $GITHUB_OUTPUT
+          rm test.py
+        continue-on-error: true
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install libsasl2-dev libldap2-dev libssl-dev
+          poetry install
+        if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
+
+      - name: Run locale generation
+        run: |
+          cd dev/code-generation
+          poetry run python main.py locales
+        env:
+          CROWDIN_API_KEY: ${{ secrets.CROWDIN_API_KEY }}
+
+      - name: Check for changes
+        id: changes
+        run: |
+          if git diff --quiet; then
+            echo "has_changes=false" >> $GITHUB_OUTPUT
+          else
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Commit and create PR
+        if: steps.changes.outputs.has_changes == 'true'
+        run: |
+          # Configure git
+          git config --local user.email "action@github.com"
+          git config --local user.name "GitHub Action"
+
+          # Create a new branch
+          BRANCH_NAME="auto-locale-sync-$(date +%Y%m%d-%H%M%S)"
+          git checkout -b "$BRANCH_NAME"
+
+          # Add and commit changes
+          git add .
+          git commit -m "chore: automatic locale sync"
+
+          # Push the branch
+          git push origin "$BRANCH_NAME"
+
+          # Create PR using GitHub CLI
+          gh pr create --title "chore: automatic locale sync" --body "## Summary
+
+          Automatically generated locale updates from the weekly sync job.
+
+          ## Changes
+          - Updated frontend locale files
+          - Generated from latest translation sources
+
+          ## Test plan
+          - [ ] Verify locale files are properly formatted
+          - [ ] Test that translations load correctly in the frontend" --base dev --head "$BRANCH_NAME"
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: No changes detected
+        if: steps.changes.outputs.has_changes == 'false'
+        run: echo "No locale changes detected, skipping PR creation"
diff --git a/Taskfile.yml b/Taskfile.yml
index ccff66fd6..c54c075db 100644
--- a/Taskfile.yml
+++ b/Taskfile.yml
@@ -70,7 +70,7 @@ tasks:
   dev:generate:
     desc: run code generators
     cmds:
-      - poetry run python dev/code-generation/main.py
+      - poetry run python dev/code-generation/main.py {{ .CLI_ARGS }}
       - task: py:format
 
   dev:services:
diff --git a/dev/code-generation/gen_ts_locales.py b/dev/code-generation/gen_ts_locales.py
index 6ff441d22..0d7e207ef 100644
--- a/dev/code-generation/gen_ts_locales.py
+++ b/dev/code-generation/gen_ts_locales.py
@@ -23,19 +23,22 @@ class LocaleData:
 
 
 LOCALE_DATA: dict[str, LocaleData] = {
-    "en-US": LocaleData(name="American English"),
-    "en-GB": LocaleData(name="British English"),
     "af-ZA": LocaleData(name="Afrikaans (Afrikaans)"),
     "ar-SA": LocaleData(name="العربية (Arabic)", dir="rtl"),
+    "bg-BG": LocaleData(name="Български (Bulgarian)"),
     "ca-ES": LocaleData(name="Català (Catalan)"),
     "cs-CZ": LocaleData(name="Čeština (Czech)"),
     "da-DK": LocaleData(name="Dansk (Danish)"),
     "de-DE": LocaleData(name="Deutsch (German)"),
     "el-GR": LocaleData(name="Ελληνικά (Greek)"),
+    "en-GB": LocaleData(name="British English"),
+    "en-US": LocaleData(name="American English"),
     "es-ES": LocaleData(name="Español (Spanish)"),
+    "et-EE": LocaleData(name="Eesti (Estonian)"),
     "fi-FI": LocaleData(name="Suomi (Finnish)"),
-    "fr-FR": LocaleData(name="Français (French)"),
     "fr-BE": LocaleData(name="Belge (Belgian)"),
+    "fr-CA": LocaleData(name="Français canadien (Canadian French)"),
+    "fr-FR": LocaleData(name="Français (French)"),
     "gl-ES": LocaleData(name="Galego (Galician)"),
     "he-IL": LocaleData(name="עברית (Hebrew)", dir="rtl"),
     "hr-HR": LocaleData(name="Hrvatski (Croatian)"),
@@ -53,6 +56,7 @@ LOCALE_DATA: dict[str, LocaleData] = {
     "pt-PT": LocaleData(name="Português (Portuguese)"),
     "ro-RO": LocaleData(name="Română (Romanian)"),
     "ru-RU": LocaleData(name="Pусский (Russian)"),
+    "sk-SK": LocaleData(name="Slovenčina (Slovak)"),
     "sl-SI": LocaleData(name="Slovenščina (Slovenian)"),
     "sr-SP": LocaleData(name="српски (Serbian)"),
     "sv-SE": LocaleData(name="Svenska (Swedish)"),
@@ -93,8 +97,8 @@ class CrowdinApi:
     project_id = "451976"
     api_key = API_KEY
 
-    def __init__(self, api_key: str):
-        api_key = api_key
+    def __init__(self, api_key: str | None):
+        self.api_key = api_key or API_KEY
 
     @property
     def headers(self) -> dict:
@@ -196,7 +200,7 @@ def inject_registration_validation_values():
 
 
 def generate_locales_ts_file():
-    api = CrowdinApi("")
+    api = CrowdinApi(None)
     models = api.get_languages()
     tmpl = Template(LOCALE_TEMPLATE)
     rendered = tmpl.render(locales=models)
diff --git a/dev/code-generation/main.py b/dev/code-generation/main.py
index 2fd542e7b..b2d0f2264 100644
--- a/dev/code-generation/main.py
+++ b/dev/code-generation/main.py
@@ -1,3 +1,4 @@
+import argparse
 from pathlib import Path
 
 import gen_py_pytest_data_paths
@@ -11,15 +12,39 @@ CWD = Path(__file__).parent
 
 
 def main():
-    items = [
-        (gen_py_schema_exports.main, "schema exports"),
-        (gen_ts_types.main, "frontend types"),
-        (gen_ts_locales.main, "locales"),
-        (gen_py_pytest_data_paths.main, "test data paths"),
-        (gen_py_pytest_routes.main, "pytest routes"),
-    ]
+    parser = argparse.ArgumentParser(description="Run code generators")
+    parser.add_argument(
+        "generators",
+        nargs="*",
+        help="Specific generators to run (schema, types, locales, data-paths, routes). If none specified, all will run.",  # noqa: E501 - long line
+    )
+    args = parser.parse_args()
 
-    for func, name in items:
+    # Define all available generators
+    all_generators = {
+        "schema": (gen_py_schema_exports.main, "schema exports"),
+        "types": (gen_ts_types.main, "frontend types"),
+        "locales": (gen_ts_locales.main, "locales"),
+        "data-paths": (gen_py_pytest_data_paths.main, "test data paths"),
+        "routes": (gen_py_pytest_routes.main, "pytest routes"),
+    }
+
+    # Determine which generators to run
+    if args.generators:
+        # Validate requested generators
+        invalid_generators = [g for g in args.generators if g not in all_generators]
+        if invalid_generators:
+            log.error(f"Invalid generator(s): {', '.join(invalid_generators)}")
+            log.info(f"Available generators: {', '.join(all_generators.keys())}")
+            return
+
+        generators_to_run = [(all_generators[g][0], all_generators[g][1]) for g in args.generators]
+    else:
+        # Run all generators (default behavior)
+        generators_to_run = list(all_generators.values())
+
+    # Run the selected generators
+    for func, name in generators_to_run:
         log.info(f"Generating {name}...")
         func()
 
diff --git a/dev/code-generation/utils/template.py b/dev/code-generation/utils/template.py
index 6312426e2..32ecf9c47 100644
--- a/dev/code-generation/utils/template.py
+++ b/dev/code-generation/utils/template.py
@@ -1,5 +1,4 @@
 import logging
-import re
 import subprocess
 from dataclasses import dataclass
 from pathlib import Path
@@ -35,7 +34,7 @@ class CodeSlicer:
     start: int
     end: int
 
-    indentation: str
+    indentation: str | None
     text: list[str]
 
     _next_line = None
@@ -47,15 +46,24 @@ class CodeSlicer:
 
     def push_line(self, string: str) -> None:
         self._next_line = self._next_line or self.start + 1
-        self.text.insert(self._next_line, self.indentation + string + "\n")
+        self.text.insert(self._next_line, (self.indentation or "") + string + "\n")
         self._next_line += 1
 
 
-def get_indentation_of_string(line: str, comment_char: str = "//|#") -> str:
-    return re.sub(rf"{comment_char}.*", "", line).removesuffix("\n")
+def get_indentation_of_string(line: str) -> str:
+    # Extract everything before the comment
+    if "//" in line:
+        indentation = line.split("//")[0]
+    elif "#" in line:
+        indentation = line.split("#")[0]
+    else:
+        indentation = line
+
+    # Keep only the whitespace, remove any non-whitespace characters
+    return "".join(c for c in indentation if c.isspace())
 
 
-def find_start_end(file_text: list[str], gen_id: str) -> tuple[int, int, str]:
+def find_start_end(file_text: list[str], gen_id: str) -> tuple[int, int, str | None]:
     start = None
     end = None
     indentation = None