#!/usr/bin/env python3
"""Ecology data cross-reference validator.

Validates all cross-references between ecology JSON data files under
public/resources/ecology/. Run after any data change to catch inconsistencies.

Usage:
    python tools/validate-ecology-data.py [--root /path/to/project]
"""

import argparse
import json
import sys
from pathlib import Path


def load_json(path: Path) -> dict | list | None:
    try:
        return json.loads(path.read_text())
    except (FileNotFoundError, json.JSONDecodeError) as e:
        return None


class EcologyValidator:
    def __init__(self, root: Path):
        self.eco = root / "public" / "resources" / "ecology"
        self.worlds = root / "public" / "resources" / "worlds"
        self.passed = 0
        self.failed = 0
        self.errors: list[str] = []

        self.biome_ids: set[str] = set()
        self.trait_enums: dict[str, set[str]] = {}
        self.all_trait_values: set[str] = set()

    def ok(self, msg: str) -> None:
        self.passed += 1
        print(f"  \u2713 {msg}")

    def fail(self, msg: str) -> None:
        self.failed += 1
        self.errors.append(msg)
        print(f"  \u2717 {msg}")

    def load_biome_ids(self) -> None:
        # ecology/biomes/ holds biome definitions in TWO shapes: an aggregate
        # file wrapping a `biomes` array, and per-biome singleton files shaped
        # `{ "id": ..., "name": ..., ... }`. Both are real biome definitions, so
        # the id universe is the union of both (mirrors the file-or-array
        # convention the runtime DataLoader merges).
        biome_dir = self.eco / "biomes"
        for f in sorted(biome_dir.glob("*.json")):
            if "schema" in f.name:
                continue
            data = load_json(f)
            if not data:
                continue
            if isinstance(data, dict) and "biomes" in data:
                for b in data["biomes"]:
                    self.biome_ids.add(b["id"])
            elif isinstance(data, dict) and "id" in data:
                self.biome_ids.add(data["id"])

        # Worlds also declare runtime biome labels (e.g. `permanent_ice`,
        # `volcano`) that `normalizes_to` a canonical ecology biome. These are
        # valid keys for biome_trait_weights.json (the weights file already
        # carries other runtime labels like `ice`/`grassland`/`plains`), so the
        # id universe must include them. Without this the validator would flag
        # legitimate runtime-label weights as "unknown".
        if self.worlds.is_dir():
            for f in sorted(self.worlds.glob("*/runtime_biomes.json")):
                data = load_json(f)
                if not data:
                    continue
                runtime = data.get("runtime_biomes", data) if isinstance(data, dict) else data
                if isinstance(runtime, list):
                    for b in runtime:
                        if isinstance(b, dict) and "id" in b:
                            self.biome_ids.add(b["id"])
                elif isinstance(runtime, dict):
                    self.biome_ids.update(runtime.keys())

    def load_trait_enums(self) -> None:
        data = load_json(self.eco / "traits" / "trait_definitions.json")
        if not data:
            self.fail("trait_definitions.json: file missing or invalid")
            return
        for cat, info in data["categories"].items():
            vals = set(info["values"])
            self.trait_enums[cat] = vals
            self.all_trait_values.update(vals)

    def check_biome_trait_weight_ids(self) -> None:
        data = load_json(self.eco / "traits" / "biome_trait_weights.json")
        if not data:
            self.fail("biome_trait_weights.json: file missing or invalid")
            return

        bad = [bid for bid in data if bid not in self.biome_ids]
        if bad:
            self.fail(f"biome_trait_weights.json: unknown biome IDs: {bad}")
        else:
            self.ok(f"biome_trait_weights: all {len(data)} biome IDs valid")

    def check_biome_trait_weight_values(self) -> None:
        data = load_json(self.eco / "traits" / "biome_trait_weights.json")
        if not data:
            return

        bad = []
        for biome, categories in data.items():
            for cat, vals in categories.items():
                if cat not in self.trait_enums:
                    bad.append(f"{biome}: unknown category '{cat}'")
                    continue
                for v in vals:
                    if v not in self.trait_enums[cat]:
                        bad.append(f"{biome}.{cat}: unknown value '{v}'")

        if bad:
            for b in bad:
                self.fail(f"biome_trait_weights.json: {b}")
        else:
            self.ok("biome_trait_weights: all trait values valid")

    def check_biome_trait_weight_sums(self) -> None:
        data = load_json(self.eco / "traits" / "biome_trait_weights.json")
        if not data:
            return

        bad = []
        for biome, categories in data.items():
            for cat, vals in categories.items():
                total = sum(vals.values())
                if total != 100:
                    bad.append(f"{biome}.{cat} = {total}")

        if bad:
            for b in bad:
                self.fail(f"biome_trait_weights.json: sum != 100: {b}")
        else:
            self.ok(f"biome_trait_weights: all category sums = 100")

    def check_quality_ranges(self) -> None:
        biome_dir = self.eco / "biomes"
        bad = []
        count = 0
        for f in sorted(biome_dir.glob("*.json")):
            if "schema" in f.name:
                continue
            data = load_json(f)
            if not data or "biomes" not in data:
                continue
            for b in data["biomes"]:
                qr = b.get("quality_range", [])
                count += 1
                if len(qr) != 2:
                    bad.append(f"{b['id']} in {f.name}: quality_range has {len(qr)} elements")
                elif not (1 <= qr[0] <= qr[1] <= 10):
                    bad.append(f"{b['id']} in {f.name}: quality_range {qr} violates 1 <= min <= max <= 10")

        if bad:
            for b in bad:
                self.fail(f"biomes: {b}")
        else:
            self.ok(f"biomes: all {count} quality_ranges valid [1..10]")

    def check_food_yield_keys(self) -> None:
        data = load_json(self.eco / "ecosystem" / "food_yield.json")
        if not data:
            self.fail("food_yield.json: file missing or invalid")
            return

        mults = data.get("quality_multipliers", {})
        expected = {str(i) for i in range(1, 11)}
        actual = set(mults.keys())
        if actual != expected:
            missing = expected - actual
            extra = actual - expected
            msg = []
            if missing:
                msg.append(f"missing: {sorted(missing)}")
            if extra:
                msg.append(f"extra: {sorted(extra)}")
            self.fail(f"food_yield.json: quality_multipliers keys: {', '.join(msg)}")
        else:
            self.ok("food_yield.json: quality_multipliers has keys 1-10")

    def check_ability_ids_unique(self) -> None:
        data = load_json(self.eco / "fauna" / "abilities.json")
        if not data:
            self.fail("abilities.json: file missing or invalid")
            return

        ids = []
        for section in ["trait_based", "tier_gated"]:
            for a in data.get(section, []):
                ids.append(a["id"])

        seen = set()
        dupes = []
        for aid in ids:
            if aid in seen:
                dupes.append(aid)
            seen.add(aid)

        if dupes:
            self.fail(f"abilities.json: duplicate IDs: {dupes}")
        else:
            self.ok(f"abilities.json: all {len(ids)} ability IDs unique")

    def _check_trait_refs(self, applies_to: dict, context: str) -> list[str]:
        bad = []
        for cat, vals in applies_to.items():
            if cat == "min_tier":
                continue
            if cat not in self.trait_enums:
                bad.append(f"{context}: unknown category '{cat}'")
                continue
            if isinstance(vals, list):
                for v in vals:
                    if v not in self.trait_enums[cat]:
                        bad.append(f"{context}: unknown {cat} value '{v}'")
        return bad

    def check_behavior_trait_refs(self) -> None:
        data = load_json(self.eco / "fauna" / "behaviors.json")
        if not data:
            self.fail("behaviors.json: file missing or invalid")
            return

        bad = []
        for b in data.get("behaviors", []):
            bad.extend(self._check_trait_refs(b.get("applies_to", {}), f"behavior '{b['id']}'"))

        if bad:
            for b in bad:
                self.fail(f"behaviors.json: {b}")
        else:
            self.ok(f"behaviors.json: all trait references valid")

    def check_ability_trait_refs(self) -> None:
        data = load_json(self.eco / "fauna" / "abilities.json")
        if not data:
            return

        bad = []
        for section in ["trait_based", "tier_gated"]:
            for a in data.get(section, []):
                bad.extend(self._check_trait_refs(a.get("trigger", {}), f"ability '{a['id']}'"))

        if bad:
            for b in bad:
                self.fail(f"abilities.json: {b}")
        else:
            self.ok("abilities.json: all trigger trait references valid")

    def check_social_multiplier_keys(self) -> None:
        data = load_json(self.eco / "fauna" / "social_multipliers.json")
        if not data:
            self.fail("social_multipliers.json: file missing or invalid")
            return

        social_vals = self.trait_enums.get("social", set())
        bad = []
        for field in ["grazing_multiplier", "typical_group_size"]:
            if field not in data:
                continue
            for k in data[field]:
                if k not in social_vals:
                    bad.append(f"{field}: unknown social value '{k}'")

        if bad:
            for b in bad:
                self.fail(f"social_multipliers.json: {b}")
        else:
            self.ok("social_multipliers.json: all social keys valid")

    def check_maturity_biome_refs(self) -> None:
        data = load_json(self.eco / "flora" / "maturity.json")
        if not data:
            self.fail("maturity.json: file missing or invalid")
            return

        soil = data.get("soil_max_by_biome", {})
        bad = [bid for bid in soil if bid not in self.biome_ids]
        if bad:
            self.fail(f"maturity.json: soil_max references unknown biomes: {bad}")
        else:
            self.ok(f"maturity.json: all {len(soil)} soil_max biome IDs valid")

    def check_combat_stats_size_keys(self) -> None:
        data = load_json(self.eco / "fauna" / "combat_stats.json")
        if not data:
            self.fail("combat_stats.json: file missing or invalid")
            return

        size_vals = self.trait_enums.get("size", set())
        bad = []
        for field in ["hp_by_size", "atk_size_base", "def_by_size", "armor_by_size"]:
            if field not in data:
                continue
            for k in data[field]:
                if k not in size_vals:
                    bad.append(f"{field}: unknown size '{k}'")

        diet_vals = self.trait_enums.get("diet", set())
        for k in data.get("atk_diet_multiplier", {}):
            if k not in diet_vals:
                bad.append(f"atk_diet_multiplier: unknown diet '{k}'")

        loco_vals = self.trait_enums.get("locomotion", set())
        for k in data.get("movement_by_locomotion", {}):
            if k not in loco_vals:
                bad.append(f"movement_by_locomotion: unknown locomotion '{k}'")

        for rule in data.get("attack_type_rules", []):
            for cat, vals in rule.get("match", {}).items():
                if cat in self.trait_enums:
                    for v in vals:
                        if v not in self.trait_enums[cat]:
                            bad.append(f"attack_type_rules: unknown {cat} value '{v}'")

        if bad:
            for b in bad:
                self.fail(f"combat_stats.json: {b}")
        else:
            self.ok("combat_stats.json: all enum keys valid")

    def check_constraint_trait_refs(self) -> None:
        data = load_json(self.eco / "traits" / "trait_constraints.json")
        if not data:
            self.fail("trait_constraints.json: file missing or invalid")
            return

        bad = []
        for pair in data.get("invalid_pairs", []):
            for key in ["a", "b"]:
                val = pair.get(key, "")
                if val not in self.all_trait_values:
                    bad.append(f"invalid_pairs: unknown trait value '{val}'")

        if bad:
            for b in bad:
                self.fail(f"trait_constraints.json: {b}")
        else:
            self.ok(f"trait_constraints.json: all {len(data.get('invalid_pairs', []))} constraint refs valid")

    def check_grazing_by_size(self) -> None:
        data = load_json(self.eco / "fauna" / "land.json")
        if not data:
            self.fail("land.json: file missing or invalid")
            return

        size_vals = self.trait_enums.get("size", set())
        grazing = data.get("grazing_by_size", {})
        bad = [k for k in grazing if k not in size_vals]
        if bad:
            self.fail(f"land.json: grazing_by_size has unknown sizes: {bad}")
        else:
            self.ok(f"land.json: grazing_by_size keys match size enum")

    def check_tier_thresholds(self) -> None:
        for fname, key_prefix in [
            ("ecosystem/health.json", "t"),
            ("ecosystem/tier_classification.json", ""),
        ]:
            data = load_json(self.eco / fname)
            if not data:
                self.fail(f"{fname}: file missing or invalid")
                continue

            thresholds = data.get("tier_thresholds", {})
            expected_keys = {f"{key_prefix}{i}" for i in range(2, 11)}
            actual_keys = set(thresholds.keys())

            if actual_keys != expected_keys:
                missing = expected_keys - actual_keys
                extra = actual_keys - expected_keys
                msg = []
                if missing:
                    msg.append(f"missing: {sorted(missing)}")
                if extra:
                    msg.append(f"extra: {sorted(extra)}")
                self.fail(f"{fname}: tier_thresholds keys: {', '.join(msg)}")
                continue

            ordered_keys = [f"{key_prefix}{i}" for i in range(2, 11)]
            values = [thresholds[k] for k in ordered_keys]
            monotonic = all(a < b for a, b in zip(values, values[1:]))
            if not monotonic:
                self.fail(f"{fname}: tier_thresholds not monotonically increasing: {values}")
            else:
                self.ok(f"{fname}: tier_thresholds monotonically increasing T2-T10")

    def run(self) -> int:
        print("Loading reference data...")
        self.load_biome_ids()
        self.load_trait_enums()
        print(f"  Found {len(self.biome_ids)} biome IDs, {len(self.trait_enums)} trait categories\n")

        if not self.biome_ids or not self.trait_enums:
            print("Cannot proceed without biome IDs and trait definitions.")
            return 1

        print("Running checks...")
        self.check_biome_trait_weight_ids()
        self.check_biome_trait_weight_values()
        self.check_biome_trait_weight_sums()
        self.check_quality_ranges()
        self.check_food_yield_keys()
        self.check_ability_ids_unique()
        self.check_ability_trait_refs()
        self.check_behavior_trait_refs()
        self.check_social_multiplier_keys()
        self.check_maturity_biome_refs()
        self.check_combat_stats_size_keys()
        self.check_constraint_trait_refs()
        self.check_grazing_by_size()
        self.check_tier_thresholds()

        total = self.passed + self.failed
        print(f"\nRESULT: {self.passed}/{total} checks passed", end="")
        if self.failed:
            print(f", {self.failed} failed")
        else:
            print()

        return 0 if self.failed == 0 else 1


def main() -> None:
    parser = argparse.ArgumentParser(description="Validate ecology data cross-references")
    parser.add_argument("--root", type=Path, default=None,
                        help="Project root (default: auto-detect from script location)")
    args = parser.parse_args()

    if args.root:
        root = args.root.resolve()
    else:
        root = Path(__file__).resolve().parent.parent

    eco_dir = root / "public" / "resources" / "ecology"
    if not eco_dir.is_dir():
        print(f"Error: ecology data directory not found at {eco_dir}")
        sys.exit(1)

    print(f"Validating ecology data in {eco_dir}\n")
    validator = EcologyValidator(root)
    sys.exit(validator.run())


if __name__ == "__main__":
    main()