"""
alloggiati/test_batch_transform.py
===================================
Batch validation test for the Alloggiati transformation pipeline.

Verifies that ISO2 → ISO3 country normalization works correctly through the
full to_alloggiati_guest() pipeline across 30 guests with 5 nationalities
and mixed input formats (ISO2 upper/lower, ISO3, full name, aliases).

Run with:
    python backend/alloggiati/test_batch_transform.py
"""

from __future__ import annotations

import datetime
import logging
import sys
import types
import uuid
from collections import defaultdict

# ---------------------------------------------------------------------------
# Path setup
# ---------------------------------------------------------------------------
sys.path.insert(0, "backend")

# ---------------------------------------------------------------------------
# Logging — captures normalization steps as required by spec
# ---------------------------------------------------------------------------
logging.basicConfig(
    level=logging.INFO,
    format="%(levelname)s  %(message)s",
    stream=sys.stdout,
)
log = logging.getLogger("alloggiati.test")

# ---------------------------------------------------------------------------
# Mock IstatCountry — mirrors production schema exactly:
#   code     = ISO3 canonical (primary key, VARCHAR(9))
#   iso_code = ISO2 (VARCHAR(3))
#   name     = display name
# ---------------------------------------------------------------------------

class _FakeCountry:
    def __init__(self, code: str, name: str, iso_code: str) -> None:
        self.code = code
        self.name = name
        self.iso_code = iso_code


_FAKE_COUNTRIES = [
    _FakeCountry("USA", "United States",  "US"),
    _FakeCountry("ITA", "Italy",          "IT"),
    _FakeCountry("IND", "India",          "IN"),
    _FakeCountry("FRA", "France",         "FR"),
    _FakeCountry("DEU", "Germany",        "DE"),
    _FakeCountry("GBR", "United Kingdom", "GB"),
    _FakeCountry("ESP", "Spain",          "ES"),
    _FakeCountry("CHN", "China",          "CN"),
    _FakeCountry("BRA", "Brazil",         "BR"),
    _FakeCountry("AUS", "Australia",      "AU"),
]


class _FakeCountryMgr:
    def only(self, *args):
        return self

    def order_by(self, *args):
        return _FAKE_COUNTRIES

    def __iter__(self):
        return iter(_FAKE_COUNTRIES)


# ---------------------------------------------------------------------------
# Mock IstatDocumentType
# ---------------------------------------------------------------------------

class _FakeDoc:
    def __init__(self, code: str, description: str) -> None:
        self.code = code
        self.description = description


class _FakeDocMgr:
    _rows = [
        _FakeDoc("PASS",  "Passport"),
        _FakeDoc("IDCRD", "Identity Card"),
        _FakeDoc("DRVLC", "Driving Licence"),
        _FakeDoc("RESID", "Residence Permit"),
    ]

    def only(self, *args):
        return self._rows


# ---------------------------------------------------------------------------
# Install mocks before importing project modules
# ---------------------------------------------------------------------------
_mock_istat = types.ModuleType("istat")
_mock_models = types.ModuleType("istat.models")
_mock_models.IstatCountry = type(
    "IstatCountry", (), {"objects": _FakeCountryMgr()}
)
_mock_models.IstatDocumentType = type(
    "IstatDocumentType", (), {"objects": _FakeDocMgr()}
)
sys.modules["istat"] = _mock_istat
sys.modules["istat.models"] = _mock_models

# ---------------------------------------------------------------------------
# Import modules under test (after mocks are in place)
# ---------------------------------------------------------------------------
from services.country_snapshot import (  # noqa: E402
    load_country_snapshot,
    reset_country_snapshot,
)
from alloggiati.normalizers import (  # noqa: E402
    clear_normalizer_caches,
    normalize_country_code,
)
from alloggiati.transformer import to_alloggiati_guest  # noqa: E402

# Reset snapshot and caches so mock data is used from scratch
reset_country_snapshot()
clear_normalizer_caches()
load_country_snapshot()

# ---------------------------------------------------------------------------
# Shared booking / structure fixtures
# ---------------------------------------------------------------------------

class _Struct:
    istat_code = "A12345"


class _Booking:
    uid = uuid.UUID("12345678-1234-5678-1234-567812345678")
    structure = _Struct()
    check_in_date = datetime.date(2026, 7, 1)
    check_out_date = datetime.date(2026, 7, 7)


_BOOKING = _Booking()


# ---------------------------------------------------------------------------
# Guest factory
# ---------------------------------------------------------------------------

def _make_guest(
    gid: int,
    full_name: str,
    nationality: str,
    gender: str = "male",
    doc_type: str = "passport",
) -> object:
    class _G:
        id = gid
        date_of_birth = datetime.date(1985, 6, 15)
        country_of_birth = "IT"
        document_issuing_country = "IT"
        booking = _BOOKING

    g = _G()
    g.full_name = full_name
    g.nationality = nationality
    g.gender = gender
    g.document_type = doc_type
    g.id_number = f"DOC{gid:04d}"
    # extra_data exposes document_number so get_field_value() finds it
    g.extra_data = {"document_number": g.id_number}
    return g


# ---------------------------------------------------------------------------
# Test dataset: 30 guests, 5 nationalities, mixed input formats
# ---------------------------------------------------------------------------

GUESTS = [
    # ── USA (6 guests) ───────────────────────────────────────────────────────
    _make_guest(1,  "John Smith",      "US",                    "male"),    # ISO2 upper
    _make_guest(2,  "Jane Doe",        "us",                    "female"),  # ISO2 lower
    _make_guest(3,  "Bob Johnson",     "USA",                   "male"),    # ISO3 canonical
    _make_guest(4,  "Alice Brown",     "United States",         "female"),  # full name
    _make_guest(5,  "Charlie Wilson",  "united states",         "male"),    # full name lower
    _make_guest(6,  "Diana Prince",    "U.S.",                  "female"),  # alias

    # ── ITA (6 guests) ───────────────────────────────────────────────────────
    _make_guest(7,  "Mario Rossi",     "IT",                    "male"),    # ISO2 upper
    _make_guest(8,  "Giulia Bianchi",  "it",                    "female"),  # ISO2 lower
    _make_guest(9,  "Luca Ferrari",    "ITA",                   "male"),    # ISO3 canonical
    _make_guest(10, "Sofia Esposito",  "Italy",                 "female"),  # full name
    _make_guest(11, "Marco Ricci",     "italy",                 "male"),    # full name lower
    _make_guest(12, "Anna Romano",     "ITA",                   "female"),  # ISO3 repeat

    # ── IND (6 guests) ───────────────────────────────────────────────────────
    _make_guest(13, "Raj Patel",       "IN",                    "male"),    # ISO2 upper
    _make_guest(14, "Priya Sharma",    "in",                    "female"),  # ISO2 lower
    _make_guest(15, "Amit Kumar",      "IND",                   "male"),    # ISO3 canonical
    _make_guest(16, "Deepa Singh",     "India",                 "female"),  # full name
    _make_guest(17, "Vikram Nair",     "india",                 "male"),    # full name lower
    _make_guest(18, "Sunita Gupta",    "IN",                    "female"),  # ISO2 repeat

    # ── FRA (6 guests) ───────────────────────────────────────────────────────
    _make_guest(19, "Pierre Dupont",   "FR",                    "male"),    # ISO2 upper
    _make_guest(20, "Marie Curie",     "fr",                    "female"),  # ISO2 lower
    _make_guest(21, "Jean Martin",     "FRA",                   "male"),    # ISO3 canonical
    _make_guest(22, "Claire Bernard",  "France",                "female"),  # full name
    _make_guest(23, "Louis Petit",     "france",                "male"),    # full name lower
    _make_guest(24, "Isabelle Moreau", "FR",                    "female"),  # ISO2 repeat

    # ── DEU (6 guests) ───────────────────────────────────────────────────────
    _make_guest(25, "Hans Mueller",    "DE",                    "male"),    # ISO2 upper
    _make_guest(26, "Greta Schmidt",   "de",                    "female"),  # ISO2 lower
    _make_guest(27, "Klaus Weber",     "DEU",                   "male"),    # ISO3 canonical
    _make_guest(28, "Helga Fischer",   "Germany",               "female"),  # full name
    _make_guest(29, "Otto Wagner",     "germany",               "male"),    # full name lower
    _make_guest(30, "Ingrid Bauer",    "DE",                    "female"),  # ISO2 repeat
]

# Expected ISO3 output for every raw input used in the dataset
EXPECTED_ISO3: dict[str, str] = {
    "US": "USA", "us": "USA", "USA": "USA",
    "United States": "USA", "united states": "USA", "U.S.": "USA",
    "IT": "ITA", "it": "ITA", "ITA": "ITA",
    "Italy": "ITA", "italy": "ITA",
    "IN": "IND", "in": "IND", "IND": "IND",
    "India": "IND", "india": "IND",
    "FR": "FRA", "fr": "FRA", "FRA": "FRA",
    "France": "FRA", "france": "FRA",
    "DE": "DEU", "de": "DEU", "DEU": "DEU",
    "Germany": "DEU", "germany": "DEU",
}

ALLOWED_ISO3 = {"USA", "ITA", "IND", "FRA", "DEU"}


# ---------------------------------------------------------------------------
# Run batch transformation
# ---------------------------------------------------------------------------

def run() -> int:
    """Execute the batch test. Returns 0 on PASS, 1 on FAIL."""

    print()
    print("=" * 70)
    print("  ALLOGGIATI BATCH TRANSFORMATION TEST")
    print("  30 guests · 5 nationalities · mixed input formats")
    print("=" * 70)

    results_valid: list[dict] = []
    results_invalid: list[dict] = []
    norm_log: list[tuple[int, str, str]] = []
    nationality_dist: dict[str, int] = defaultdict(int)

    for guest in GUESTS:
        raw_nat = guest.nationality
        nationality_dist[raw_nat] += 1

        # Log normalization step (required by spec)
        norm_nat = normalize_country_code(raw_nat)
        log.info("nationality resolved: %s → %s", raw_nat, norm_nat)
        norm_log.append((guest.id, raw_nat, norm_nat))

        result = to_alloggiati_guest(guest)

        if result["valid"]:
            results_valid.append(
                {
                    "guest_id": guest.id,
                    "name":     guest.full_name,
                    "raw_nat":  raw_nat,
                    "norm_nat": result["data"]["nationality"],
                    "payload":  result["data"],
                }
            )
        else:
            results_invalid.append(
                {
                    "guest_id": guest.id,
                    "name":     guest.full_name,
                    "raw_nat":  raw_nat,
                    "reason":   result["reason"],
                }
            )

    # ── A. Test coverage ──────────────────────────────────────────────────────
    print(f"\n{'─' * 70}")
    print("  A. TEST COVERAGE")
    print(f"{'─' * 70}")
    print(f"  Total guests tested : {len(GUESTS)}")
    print("  Distribution by raw nationality input:")
    for raw, count in sorted(nationality_dist.items()):
        expected = EXPECTED_ISO3.get(raw, "?")
        print(f"    {raw:<24} → expected {expected:<5}  (n={count})")

    # ── B. Transformation results ─────────────────────────────────────────────
    print(f"\n{'─' * 70}")
    print("  B. TRANSFORMATION RESULTS")
    print(f"{'─' * 70}")
    print(f"  Successfully normalized : {len(results_valid)}")
    print(f"  Failed transformations  : {len(results_invalid)}")

    if results_invalid:
        print("\n  FAILED GUESTS:")
        for r in results_invalid:
            print(
                f"    guest_id={r['guest_id']}  name={r['name']!r}  "
                f"raw_nat={r['raw_nat']!r}  reason={r['reason']!r}"
            )

    # ── C. Log summary ────────────────────────────────────────────────────────
    print(f"\n{'─' * 70}")
    print("  C. NORMALIZATION LOG (first 10 entries)")
    print(f"{'─' * 70}")
    for gid, raw, norm in norm_log[:10]:
        print(f"    nationality resolved: {raw!r:<24} → {norm!r}")
    if len(norm_log) > 10:
        print(f"    ... ({len(norm_log) - 10} more entries)")

    # ── D. Payload verification ───────────────────────────────────────────────
    print(f"\n{'─' * 70}")
    print("  D. PAYLOAD VERIFICATION — one sample per nationality")
    print(f"{'─' * 70}")

    seen_iso3: dict[str, dict] = {}
    for r in results_valid:
        iso3 = r["norm_nat"]
        if iso3 not in seen_iso3:
            seen_iso3[iso3] = r

    for iso3 in sorted(seen_iso3):
        r = seen_iso3[iso3]
        print(
            f"  guest_id={r['guest_id']}  raw={r['raw_nat']!r:<24} "
            f"payload[nationality]={r['payload']['nationality']!r}"
        )

    # ── E. Correctness validation ─────────────────────────────────────────────
    print(f"\n{'─' * 70}")
    print("  E. CORRECTNESS VALIDATION")
    print(f"{'─' * 70}")

    violations: list[str] = []

    # Check every valid result has an allowed ISO3 nationality
    for r in results_valid:
        nat = r["norm_nat"]
        raw = r["raw_nat"]
        expected = EXPECTED_ISO3.get(raw)
        if nat not in ALLOWED_ISO3:
            violations.append(
                f"  guest_id={r['guest_id']} raw={raw!r} → {nat!r} "
                f"(not in allowed ISO3 set)"
            )
        elif expected and nat != expected:
            violations.append(
                f"  guest_id={r['guest_id']} raw={raw!r} → {nat!r} "
                f"(expected {expected!r})"
            )

    # Check every expected mapping has at least one passing sample
    for raw, expected_iso3 in EXPECTED_ISO3.items():
        matching = [r for r in results_valid if r["raw_nat"] == raw]
        if not matching:
            violations.append(f"  No valid result for raw input {raw!r}")
        else:
            for r in matching:
                if r["norm_nat"] != expected_iso3:
                    violations.append(
                        f"  guest_id={r['guest_id']} raw={raw!r} → "
                        f"{r['norm_nat']!r} (expected {expected_iso3!r})"
                    )

    if violations:
        print("  VIOLATIONS FOUND:")
        for v in violations:
            print(v)
    else:
        print("  All 30 guests produced correct ISO3 nationality codes")
        print("  No ISO2 values found in any payload")
        print("  No raw strings leaked into any payload")
        print("  All 5 nationality mappings verified:")
        for raw_ex, iso3_ex in [
            ("US", "USA"), ("IT", "ITA"), ("IN", "IND"),
            ("FR", "FRA"), ("DE", "DEU"),
        ]:
            print(f"       {raw_ex} → {iso3_ex}  OK")

    # ── Final verdict ─────────────────────────────────────────────────────────
    print()
    print("=" * 70)

    all_valid     = len(results_valid) == len(GUESTS)
    no_violations = len(violations) == 0

    if all_valid and no_violations:
        print("  TEST RESULT: PASSED")
        print()
        print("  All 30 guests transformed successfully. ISO2 → ISO3 mapping")
        print("  works correctly for all input formats: ISO2 upper/lower,")
        print("  ISO3 canonical, full country name, and informal aliases.")
        print("  No ISO2 values, no raw strings, no empty nationality fields")
        print("  reached any payload. Pipeline is production-safe.")
        print("=" * 70)
        return 0
    else:
        print("  TEST RESULT: FAILED")
        print()
        if not all_valid:
            print(f"  {len(results_invalid)} guest(s) failed transformation.")
        if not no_violations:
            print(f"  {len(violations)} correctness violation(s) detected.")
        print("=" * 70)
        return 1


if __name__ == "__main__":
    sys.exit(run())
