"""
services/country_snapshot.py
=============================
Immutable in-memory country lookup snapshot for the Alloggiati transformer.

Purpose
-------
Eliminate all runtime ORM access from the country normalization path.
The snapshot is loaded ONCE at Django startup (via ServicesConfig.ready())
and then provides O(1) pure-dict lookups for the lifetime of the process.

Design guarantees
-----------------
1. FAIL-FAST: load_country_snapshot() raises RuntimeError if the loaded
   dataset is empty or structurally invalid.  The process will not start
   in a partially-initialized state when the DB is reachable but broken.

2. NO LAZY LOADING: resolve_istat_country_code() does NOT trigger a DB
   load.  If the snapshot was not loaded at startup, it raises
   SnapshotNotLoadedError immediately.  There is no silent fallback.

3. IMMUTABLE AFTER LOAD: all public maps are wrapped in MappingProxyType
   after construction.  Any attempt to mutate them raises TypeError.

4. DETERMINISTIC: same input → same output across all workers and
   processes.  No hidden state, no per-request caching layers.

5. MULTI-WORKER SAFE:
   - gunicorn pre-fork: snapshot loaded in master, inherited by workers.
   - gunicorn post-fork / gevent: each worker calls ready() independently.
   - Kubernetes replicas: each pod loads independently from the same DB.
   - No shared memory is assumed across nodes or processes.

6. ATOMIC VISIBILITY: the snapshot is either fully visible or not visible
   at all.  The load sequence is:
     a. Build all five dicts as local variables (no globals touched).
     b. Run _validate_snapshot()           — individual map checks.
     c. Run _validate_snapshot_consistency() — cross-map integrity gate.
     d. Wrap all five dicts in MappingProxyType as local variables.
     e. Assign all five globals + set _snapshot_loaded = True in one
        uninterrupted block with no code between the first assignment
        and the flag.
   If any step a–d raises, no global is ever modified.  A consumer can
   never observe ISO2_TO_CODE populated while NAME_TO_CODE is empty, or
   any other partial combination.

IstatCountry schema (production)
---------------------------------
  code     — primary key VARCHAR(9), ISO3 alpha (e.g. "USA", "ITA")
  iso_code — VARCHAR(3), ISO2 (e.g. "US", "IT") or ISO3 when no ISO2
  name     — display name (e.g. "United States", "Italy")

Public maps (read-only MappingProxyType after load)
----------------------------------------------------
  ISO2_TO_CODE  : "US"    → "USA"   (iso_code upper → code)
  ISO3_TO_CODE  : "USA"   → "USA"   (code upper → code, canonical pass-through)
  NAME_TO_CODE  : "italy" → "ITA"   (name lower → code)
  CODE_TO_ISO2  : "USA"   → "US"    (code upper → iso_code, backward compat)
  NAME_TO_ISO2  : "italy" → "IT"    (name lower → iso_code, backward compat)

Failure constant
----------------
  COUNTRY_NOT_FOUND = ""   — returned by resolve_istat_country_code() when
                             no match is found.  Never None, never raw input.
"""

from __future__ import annotations

import threading
from types import MappingProxyType
from typing import Optional

# ---------------------------------------------------------------------------
# Failure constant — explicit, consistent, never None
# ---------------------------------------------------------------------------

COUNTRY_NOT_FOUND: str = ""

# ---------------------------------------------------------------------------
# Manual aliases — informal names → ISO2 intermediate
# Kept minimal: only entries that cannot be derived from the DB name column.
# ---------------------------------------------------------------------------

_MANUAL_ALIASES: dict[str, str] = {
    # United States variants
    "u.s.":                     "US",
    "u.s.a.":                   "US",
    "usa":                      "US",
    "united states":            "US",
    "united states of america": "US",
    # United Kingdom variants
    "uk":                       "GB",
    "u.k.":                     "GB",
    "great britain":            "GB",
    "england":                  "GB",
    # UAE variants
    "uae":                      "AE",
    "u.a.e.":                   "AE",
    "emirates":                 "AE",
    # Russia — ISTAT official name is "Russian Federation"
    "russia":                   "RU",
    # Other common short-form names that differ from ISTAT official names
    "south korea":              "KR",
    "north korea":              "KP",
    "iran":                     "IR",
    "syria":                    "SY",
    "bolivia":                  "BO",
    "tanzania":                 "TZ",
    "moldova":                  "MD",
    "congo":                    "CG",
    "dr congo":                 "CD",
    "democratic republic of the congo": "CD",
    "ivory coast":              "CI",
    "cape verde":               "CV",
    "eswatini":                 "SZ",
    "swaziland":                "SZ",
    "taiwan":                   "TW",
    "palestine":                "PS",
    "brunei":                   "BN",
    "laos":                     "LA",
    "vietnam":                  "VN",
    "timor-leste":              "TL",
    "east timor":               "TL",
    "micronesia":               "FM",
}

# ---------------------------------------------------------------------------
# Module-level snapshot — MappingProxyType after load, empty dict before.
# The type annotation uses the broader Mapping to reflect both states.
# ---------------------------------------------------------------------------

ISO2_TO_CODE: MappingProxyType = MappingProxyType({})
ISO3_TO_CODE: MappingProxyType = MappingProxyType({})
NAME_TO_CODE: MappingProxyType = MappingProxyType({})
CODE_TO_ISO2: MappingProxyType = MappingProxyType({})
NAME_TO_ISO2: MappingProxyType = MappingProxyType({})

_snapshot_loaded: bool = False
_load_lock = threading.Lock()


# ---------------------------------------------------------------------------
# Custom exception
# ---------------------------------------------------------------------------

class SnapshotNotLoadedError(RuntimeError):
    """
    Raised when resolve_istat_country_code() is called before the snapshot
    has been loaded.

    This indicates a startup sequencing bug — ServicesConfig.ready() must
    run before any request is processed.
    """


# ---------------------------------------------------------------------------
# Internal: validation
# ---------------------------------------------------------------------------

def _validate_snapshot(
    iso2_map: dict[str, str],
    iso3_map: dict[str, str],
    name_map: dict[str, str],
) -> None:
    """
    Validate that the loaded snapshot contains usable data.

    Checks:
      - ISO2_TO_CODE is non-empty
      - ISO3_TO_CODE is non-empty
      - NAME_TO_CODE is non-empty
      - All values in ISO3_TO_CODE are non-empty strings

    Raises:
        RuntimeError: with a descriptive message if any check fails.
            This is intentional — an empty or corrupt reference dataset
            is a fatal misconfiguration, not a recoverable error.
    """
    if not iso2_map:
        raise RuntimeError(
            "Country snapshot initialization failed: ISO2_TO_CODE is empty. "
            "The istat_countries table may be empty or all rows lack iso_code."
        )
    if not iso3_map:
        raise RuntimeError(
            "Country snapshot initialization failed: ISO3_TO_CODE is empty. "
            "The istat_countries table may be empty or all rows lack a code."
        )
    if not name_map:
        raise RuntimeError(
            "Country snapshot initialization failed: NAME_TO_CODE is empty. "
            "The istat_countries table may be empty or all rows lack a name."
        )

    # Ensure no empty-string values slipped through
    bad_iso3 = [k for k, v in iso3_map.items() if not v]
    if bad_iso3:
        raise RuntimeError(
            f"Country snapshot initialization failed: {len(bad_iso3)} ISO3 "
            f"entries have empty canonical codes: {bad_iso3[:5]}"
        )


def _validate_snapshot_consistency(
    iso2_map: dict[str, str],
    iso3_map: dict[str, str],
    name_map: dict[str, str],
    rev_map:  dict[str, str],
    name_iso2: dict[str, str],
) -> None:
    """
    Atomic readiness gate — cross-map consistency check run immediately
    before the global snapshot state is published.

    This is the final barrier that prevents a partially-constructed or
    internally-inconsistent snapshot from ever becoming visible to callers.
    It runs AFTER _validate_snapshot() (which checks individual map
    emptiness) and BEFORE any global variable is assigned.

    Rules enforced
    --------------
    1. Every ISO2 key in iso2_map resolves to a code that exists in iso3_map.
       Broken: iso2_map["US"] = "USA" but "USA" not in iso3_map.

    2. Every code key in rev_map (CODE_TO_ISO2) exists in iso3_map.
       Broken: rev_map["USA"] = "US" but "USA" not in iso3_map.

    3. Every ISO2 value in rev_map exists as a key in iso2_map.
       Broken: rev_map["USA"] = "US" but "US" not in iso2_map.

    4. Every code value in name_map exists in iso3_map.
       Broken: name_map["italy"] = "ITA" but "ITA" not in iso3_map.

    5. Every key in name_iso2 also exists in name_map.
       Broken: name_iso2["italy"] = "IT" but "italy" not in name_map.

    All five rules must pass simultaneously.  If any fails, a RuntimeError
    is raised with a precise description of the inconsistency.  The global
    snapshot state is NOT modified — the system remains in its previous
    clean state (either fully loaded from a prior call, or unloaded).

    Args:
        iso2_map:  ISO2 upper → canonical ISO3 code
        iso3_map:  ISO3 upper → canonical ISO3 code
        name_map:  name lower → canonical ISO3 code
        rev_map:   ISO3 upper → ISO2 (reverse of iso2_map)
        name_iso2: name lower → ISO2

    Raises:
        RuntimeError: if any cross-map consistency rule is violated.
    """
    # Rule 1: every ISO2 → code target must exist in ISO3 map
    orphaned_iso2 = [
        f"{k!r} → {v!r}"
        for k, v in iso2_map.items()
        if v.upper() not in iso3_map
    ]
    if orphaned_iso2:
        raise RuntimeError(
            "Country snapshot atomic readiness check failed: "
            f"{len(orphaned_iso2)} ISO2 entries point to codes absent from "
            f"ISO3_TO_CODE: {orphaned_iso2[:3]}"
        )

    # Rule 2: every CODE_TO_ISO2 key must exist in ISO3 map
    orphaned_rev_keys = [
        k for k in rev_map if k not in iso3_map
    ]
    if orphaned_rev_keys:
        raise RuntimeError(
            "Country snapshot atomic readiness check failed: "
            f"{len(orphaned_rev_keys)} CODE_TO_ISO2 keys absent from "
            f"ISO3_TO_CODE: {orphaned_rev_keys[:3]}"
        )

    # Rule 3: every CODE_TO_ISO2 value (ISO2) must exist in ISO2 map
    orphaned_rev_vals = [
        f"{k!r} → {v!r}"
        for k, v in rev_map.items()
        if v.upper() not in iso2_map
    ]
    if orphaned_rev_vals:
        raise RuntimeError(
            "Country snapshot atomic readiness check failed: "
            f"{len(orphaned_rev_vals)} CODE_TO_ISO2 values absent from "
            f"ISO2_TO_CODE: {orphaned_rev_vals[:3]}"
        )

    # Rule 4: every name → code target must exist in ISO3 map
    orphaned_names = [
        f"{k!r} → {v!r}"
        for k, v in name_map.items()
        if v.upper() not in iso3_map
    ]
    if orphaned_names:
        raise RuntimeError(
            "Country snapshot atomic readiness check failed: "
            f"{len(orphaned_names)} NAME_TO_CODE entries point to codes absent "
            f"from ISO3_TO_CODE: {orphaned_names[:3]}"
        )

    # Rule 5: every name_iso2 key must also appear in name_map
    orphaned_name_iso2 = [
        k for k in name_iso2 if k not in name_map
    ]
    if orphaned_name_iso2:
        raise RuntimeError(
            "Country snapshot atomic readiness check failed: "
            f"{len(orphaned_name_iso2)} NAME_TO_ISO2 keys absent from "
            f"NAME_TO_CODE: {orphaned_name_iso2[:3]}"
        )


# ---------------------------------------------------------------------------
# Loader
# ---------------------------------------------------------------------------

def load_country_snapshot() -> None:
    """
    Load all IstatCountry rows into the immutable in-memory snapshot.

    Idempotent — safe to call multiple times; only the first call performs
    the DB query.  Subsequent calls return immediately.

    Called from ServicesConfig.ready() so the snapshot is available before
    any request is processed.

    Raises:
        RuntimeError: if the loaded dataset is empty or structurally invalid.
            This is a fatal startup error — the process should not continue.
        django.db.OperationalError / ProgrammingError: propagated to the
            caller (ServicesConfig.ready()) which handles migration-state
            exceptions separately.
    """
    global _snapshot_loaded, ISO2_TO_CODE, ISO3_TO_CODE, NAME_TO_CODE
    global CODE_TO_ISO2, NAME_TO_ISO2

    # Fast path — already loaded (no lock needed for read).
    if _snapshot_loaded:
        return

    with _load_lock:
        # Double-checked locking — re-test inside the lock.
        if _snapshot_loaded:
            return

        from istat.models import IstatCountry  # deferred — avoids circular imports

        iso2_map:  dict[str, str] = {}
        iso3_map:  dict[str, str] = {}
        name_map:  dict[str, str] = {}
        rev_map:   dict[str, str] = {}
        name_iso2: dict[str, str] = {}

        for row in IstatCountry.objects.only("code", "iso_code", "name"):
            code = (row.code     or "").strip()
            iso  = (row.iso_code or "").strip().upper()
            name = (row.name     or "").strip()

            if not code:
                continue

            code_upper = code.upper()

            # Canonical pass-through: ISO3 → ISO3
            iso3_map[code_upper] = code

            if len(iso) == 2 and iso.isalpha():
                # ISO2 → canonical
                iso2_map.setdefault(iso, code)
                # canonical → ISO2 (reverse, for backward compat)
                rev_map.setdefault(code_upper, iso)
                # name → ISO2 (for normalize_country backward compat)
                if name:
                    name_iso2.setdefault(name.lower(), iso)
            elif len(iso) == 3 and iso.isalpha():
                # iso_code is itself ISO3 — alias to canonical
                iso3_map.setdefault(iso, code)

            # Name → canonical (case-insensitive)
            if name:
                name_map.setdefault(name.lower(), code)

        # Apply manual aliases: alias → ISO2 → canonical
        for alias, iso2 in _MANUAL_ALIASES.items():
            iso2_upper = iso2.upper()
            if iso2_upper in iso2_map:
                name_map.setdefault(alias, iso2_map[iso2_upper])
                name_iso2.setdefault(alias, iso2)

        # ── Stage 1: individual map validation ──────────────────────────────
        # Raises RuntimeError if any map is empty or contains empty values.
        _validate_snapshot(iso2_map, iso3_map, name_map)

        # ── Stage 2: cross-map consistency gate ─────────────────────────────
        # Raises RuntimeError if any inter-map reference is broken.
        # No global state has been modified yet at this point.
        _validate_snapshot_consistency(iso2_map, iso3_map, name_map, rev_map, name_iso2)

        # ── Stage 3: build all proxies as locals ─────────────────────────────
        # All five MappingProxyType objects are constructed here, before any
        # global is touched.  If MappingProxyType() itself raises (should never
        # happen, but defensive), globals remain in their previous clean state.
        _new_iso2  = MappingProxyType(iso2_map)
        _new_iso3  = MappingProxyType(iso3_map)
        _new_name  = MappingProxyType(name_map)
        _new_rev   = MappingProxyType(rev_map)
        _new_niso2 = MappingProxyType(name_iso2)

        # ── Stage 4: atomic publish ──────────────────────────────────────────
        # All five globals are assigned and _snapshot_loaded is set True in a
        # single uninterrupted block.  No validation, no function calls, no
        # branches between the first assignment and the flag.  This is the
        # only place _snapshot_loaded transitions from False → True.
        ISO2_TO_CODE     = _new_iso2
        ISO3_TO_CODE     = _new_iso3
        NAME_TO_CODE     = _new_name
        CODE_TO_ISO2     = _new_rev
        NAME_TO_ISO2     = _new_niso2
        _snapshot_loaded = True


def reset_country_snapshot() -> None:
    """
    Clear the snapshot and mark it as unloaded.

    Use ONLY in tests to swap in fresh mock data between test cases.
    Never call this in production code.
    """
    global _snapshot_loaded, ISO2_TO_CODE, ISO3_TO_CODE, NAME_TO_CODE
    global CODE_TO_ISO2, NAME_TO_ISO2

    with _load_lock:
        ISO2_TO_CODE = MappingProxyType({})
        ISO3_TO_CODE = MappingProxyType({})
        NAME_TO_CODE = MappingProxyType({})
        CODE_TO_ISO2 = MappingProxyType({})
        NAME_TO_ISO2 = MappingProxyType({})
        _snapshot_loaded = False


# ---------------------------------------------------------------------------
# Pure lookup — zero ORM, zero DB, no lazy loading
# ---------------------------------------------------------------------------

def resolve_istat_country_code(value: Optional[str]) -> str:
    """
    Resolve any country representation to IstatCountry.code (canonical ISO3).

    Pure dictionary lookup — no ORM, no DB access, O(1).

    The snapshot MUST be loaded before this function is called.  If it has
    not been loaded, SnapshotNotLoadedError is raised immediately.  There is
    no lazy fallback — this is intentional to prevent silent degradation.

    Resolution order:
      1. Empty / None / whitespace-only  → COUNTRY_NOT_FOUND ("")
      2. Exact match on ISO3/code        → return code  ("USA" → "USA")
      3. Match on ISO2                   → return code  ("US"  → "USA")
      4. Match on country name           → return code  ("Italy" → "ITA")
      5. No match                        → COUNTRY_NOT_FOUND ("")

    Args:
        value: Any country representation.

    Returns:
        IstatCountry.code string if resolved, otherwise COUNTRY_NOT_FOUND.
        Never None, never raises (except SnapshotNotLoadedError on bad startup).
    """
    if not value:
        return COUNTRY_NOT_FOUND

    stripped = value.strip()
    if not stripped:
        return COUNTRY_NOT_FOUND

    # Fail-fast: snapshot must be loaded before any request is processed.
    # This is NOT a lazy load — it is an explicit startup contract violation.
    if not _snapshot_loaded:
        raise SnapshotNotLoadedError(
            "Country snapshot has not been loaded. "
            "ServicesConfig.ready() must run before resolve_istat_country_code() "
            "is called. Check your Django AppConfig and INSTALLED_APPS order."
        )

    upper = stripped.upper()

    # 1. ISO3 / canonical pass-through
    if upper in ISO3_TO_CODE:
        return ISO3_TO_CODE[upper]

    # 2. ISO2 lookup
    if upper in ISO2_TO_CODE:
        return ISO2_TO_CODE[upper]

    # 3. Name lookup (case-insensitive)
    name_key = stripped.lower()
    if name_key in NAME_TO_CODE:
        return NAME_TO_CODE[name_key]

    return COUNTRY_NOT_FOUND
