"""
alloggiati/normalizers.py
=========================
Reusable normalization helpers for the Alloggiati Web transformer.

Design principles
-----------------
- PURE: no DB writes, no API calls, no logging of sensitive data.
- SAFE: every function returns a defined value on any input — never raises.
- STRICT: fallback on unresolvable input is "" (empty string), NOT the raw
  value.  Leaking human-readable strings like "United States" or "passport"
  into an Alloggiati payload would cause silent rejection by the portal.
- EFFICIENT: DB lookups happen at most once per process via lru_cache.
  transform_booking_guests() may process hundreds of guests; repeated queries
  for the same country/document value must never occur.

Reuse policy
------------
- Country normalization delegates entirely to the existing
  services.country_utils.resolve_istat_country_code() which already queries
  IstatCountry with its own lru_cache.  No duplicate DB queries, no duplicate
  country mapping tables.
- Document type normalization queries IstatDocumentType once and caches the
  result map for the lifetime of the process.
- Gender normalization is stateless — no DB access needed.

No ISTAT models are imported at module level.  All model imports are deferred
inside cached loader functions to avoid circular-import issues at Django
startup.

Thread safety
-------------
lru_cache is thread-safe for reads in CPython (GIL protects the dict).
The cached maps are read-only after construction, so concurrent access from
multiple threads is safe.  clear_normalizer_caches() should only be called
from single-threaded test teardown or management commands.
"""

from __future__ import annotations

from functools import lru_cache
from typing import Any, Optional


# ---------------------------------------------------------------------------
# Small alias map for document type inputs that don't match IstatDocumentType
# descriptions directly.
#
# Keep this list minimal — only add entries that are genuinely unavoidable
# (common user-facing labels that differ from the official DB descriptions).
# Do NOT add full country lists or large lookup tables here.
# ---------------------------------------------------------------------------

_DOCUMENT_TYPE_ALIASES: dict[str, str] = {
    # user-facing label              → canonical DB description (lowercase)
    "id card":                       "identity card",
    "id_card":                       "identity card",
    "national id":                   "identity card",
    "national id card":              "identity card",
    "carta identita":                "identity card",
    "carta d'identita":              "identity card",
    "drivers license":               "driving licence",
    "drivers_license":               "driving licence",
    "driver license":                "driving licence",
    "driver's license":              "driving licence",
    "driving license":               "driving licence",
    "patente":                       "driving licence",
    "passaporto":                    "passport",
}


# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------

def _safe_str(value: Any) -> str:
    """
    Convert any value to a stripped string safely.

    Handles None, int, bool, float, and str without raising.
    Booleans are coerced to "" because True/False are never valid field values
    in this context (bool is a subclass of int in Python, so we check it first).

    Args:
        value: Any raw field value.

    Returns:
        Stripped string, or "" for None/bool/empty.
    """
    if value is None:
        return ""
    if isinstance(value, bool):
        # bool is a subclass of int — must be checked before int
        return ""
    return str(value).strip()


def _normalize_key(value: str) -> str:
    """Lowercase + strip for case-insensitive dict lookups."""
    return value.strip().lower()


@lru_cache(maxsize=1)
def _load_document_type_map() -> dict[str, str]:
    """
    Build a lookup dict from IstatDocumentType (read-only, cached for process lifetime).

    Keys added per DB row:
      - normalised description  e.g. "passport"      → "PASS"
      - normalised code         e.g. "pass"           → "PASS"

    Called at most once per process; subsequent calls return the cached dict.
    The cache is populated lazily on first normalization call, not at import time.
    """
    from istat.models import IstatDocumentType  # deferred — avoids circular imports

    mapping: dict[str, str] = {}
    for doc in IstatDocumentType.objects.only("code", "description"):
        code = (doc.code or "").strip()
        description = (doc.description or "").strip()
        if not code:
            continue
        if description:
            mapping[_normalize_key(description)] = code
        # Always index by code itself so already-coded values pass through.
        mapping[_normalize_key(code)] = code

    return mapping


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------

def normalize_gender(value: Any) -> str:
    """
    Normalize any gender representation to the Alloggiati Web accepted codes.

    Accepted inputs (case-insensitive, leading/trailing whitespace ignored):
      "male", "m", "maschio", "1"   → "M"
      "female", "f", "femmina", "2" → "F"
      anything else / None / bool   → "U"

    Integers are accepted: 1 → "M", 2 → "F".
    Booleans always map to "U" (True/False are not valid gender values).
    Never raises.

    Args:
        value: Raw gender value from Guest.gender or extra_data.

    Returns:
        "M", "F", or "U" — always a non-empty string.
    """
    # Booleans must be caught before int check (bool is subclass of int).
    if isinstance(value, bool):
        return "U"

    if isinstance(value, int):
        if value == 1:
            return "M"
        if value == 2:
            return "F"
        return "U"

    raw = _safe_str(value)
    if not raw:
        return "U"

    key = raw.lower()
    if key in {"male", "m", "maschio", "1"}:
        return "M"
    if key in {"female", "f", "femmina", "2"}:
        return "F"
    return "U"


def normalize_country_code(value: Optional[str]) -> str:
    """
    Convert any country representation to IstatCountry.code (canonical ISO3).

    Delegates directly to services.country_snapshot.resolve_istat_country_code()
    — a pure O(1) dict lookup against the immutable in-memory snapshot loaded
    at Django startup.  Zero ORM, zero DB access.

    No lru_cache is applied here: the snapshot is already O(1) and immutable,
    so an additional caching layer would only add memory overhead and hide
    correctness issues without providing any performance benefit.

    Resolution order (handled inside the snapshot resolver):
      1. Empty / None                → COUNTRY_NOT_FOUND ("")
      2. Exact match on code (ISO3)  → return code  ("USA" → "USA")
      3. Match on iso_code (ISO2)    → return code  ("US"  → "USA")
      4. Match on country name       → return code  ("Italy" → "ITA")
      5. Unresolvable                → COUNTRY_NOT_FOUND ("")

    Returning "" on failure is intentional: the transformer's post-normalization
    validation will produce "invalid_nationality_code" rather than silently
    sending a human-readable string like "United States" to the portal.

    Args:
        value: Any country representation — ISO2 ("IT"), ISO3/canonical ("ITA"),
               or full name ("Italy").

    Returns:
        IstatCountry.code string if resolved, otherwise "".
        Never None, never raises (SnapshotNotLoadedError propagates if startup
        was incomplete — this is intentional fail-fast behavior).
    """
    if not value:
        return ""

    stripped = value.strip()
    if not stripped:
        return ""

    from services.country_snapshot import resolve_istat_country_code as _snap_resolve

    return _snap_resolve(stripped)


@lru_cache(maxsize=256)
def normalize_document_type(value: Optional[str]) -> str:
    """
    Map a user-friendly document type string to the official ISTAT code.

    Resolution order:
      1. Empty / None input                                → ""
      2. Direct match against IstatDocumentType.code       → return code
      3. Direct match against IstatDocumentType.description → return code
      4. Match via _DOCUMENT_TYPE_ALIASES preprocessing    → return code
      5. Unresolvable                                      → ""

    Returning "" on failure is intentional: the transformer's post-normalization
    validation will produce "invalid_document_type" rather than sending a raw
    string like "random document" to the Alloggiati portal.

    The IstatDocumentType table is loaded once and cached for the process
    lifetime (_load_document_type_map).

    Args:
        value: Raw document type (e.g. "passport", "PASS", "id card").

    Returns:
        Official ISTAT document type code if resolved, otherwise "".
        Never None, never raises.
    """
    if not value:
        return ""

    stripped = value.strip()
    if not stripped:
        return ""

    doc_map = _load_document_type_map()
    key = _normalize_key(stripped)

    # Direct match against code or description.
    if key in doc_map:
        return doc_map[key]

    # Try alias preprocessing for common user-facing labels.
    aliased_key = _DOCUMENT_TYPE_ALIASES.get(key)
    if aliased_key and aliased_key in doc_map:
        return doc_map[aliased_key]

    # Strict fallback: return "" so the caller can detect normalization failure.
    return ""


def clear_normalizer_caches() -> None:
    """
    Clear document-type cache and reset the country snapshot.

    Use in tests after installing mock data to force a fresh load on the
    next normalization call.  Should only be called from single-threaded
    test setup/teardown.

    normalize_country_code() has no lru_cache (the snapshot is already O(1)),
    so only the document type map and snapshot state need resetting.
    """
    _load_document_type_map.cache_clear()
    normalize_document_type.cache_clear()

    # Reset the country snapshot so mock IstatCountry data is picked up.
    from services.country_snapshot import reset_country_snapshot
    reset_country_snapshot()