import json
import os
import sys
from typing import Any, Dict, List, Tuple

import pandas as pd


DEFAULT_PATH = r"D:\Downloads\CONTROL PLAN DEFECT HPML REV (1).xlsx"


def is_mark(v: Any) -> bool:
    if v is None:
        return False
    s = str(v).strip()
    if not s:
        return False
    return s in {"√", "YES", "Y", "1", "TRUE", "T", "OK", "X", "x"}


def clean(s: Any) -> str:
    if s is None:
        return ""
    return str(s).replace("\u00a0", " ").strip()


def main() -> int:
    path = os.environ.get("CONTROL_PLAN_XLSX", DEFAULT_PATH)
    sheet = os.environ.get("CONTROL_PLAN_SHEET", "Probable Causes Master ")

    df = pd.read_excel(path, sheet_name=sheet, engine="openpyxl")
    cols = list(df.columns)
    if len(cols) < 5:
        raise RuntimeError("Unexpected control plan format: too few columns")

    # Heuristic: category column named "CATEGORIES"; cause text is the next wide unnamed column
    cat_col = "CATEGORIES" if "CATEGORIES" in cols else cols[1]
    cause_col = cols[2]

    defect_cols = [c for c in cols if c not in {cols[0], cat_col, cause_col}]

    # Normalize defect column names to lowercase keys used by rejection_data (problem_<key>)
    # Excel has a mix of short codes ("BH", "Sh") and longer ("Porosity", "Core shift")
    def keyify(name: str) -> str:
        s = clean(name).lower()
        s = s.replace(" ", "_").replace("-", "_").replace("/", "_")
        s = "".join(ch for ch in s if ch.isalnum() or ch == "_")
        s = s.strip("_")
        # special short codes
        if s == "bh":
            return "bh"
        if s == "sh":
            return "dft"  # shrinkage in app is dft (draw/fit?); keep mapping explicit below if needed
        return s

    # Explicit mapping to your app defect keys (from MetallurgyDashboard REJECTION_PROBLEMS)
    # Extend this as needed.
    explicit = {
        "bh": "bh",
        "slag": "slag",
        "ph": "ph",
        "porosity": "porocity",  # app uses porocity
        "shrinekage": "dft",
        "shrinakage": "dft",
        "inc": "inc",
        "incl": "inc",
        "rough": "rough_surface",
        "rough_": "rough_surface",
        "sand_wash": "sw",
        "core_shift": "cs",
        "leakage": "lkg",
        "em": "em",
        "crack": "crack",
        "uc": "uc",
        "cold_shut": "csh",
        "broken": "broken",
        "mold_crush": "mould_crush",
        "mould_crush": "mould_crush",
        "dipression": "dipression",
        "bend": "bend",
        "scab": "scab",
        "low_bhn": "low_bhn",
        "high_bhn": "high_bhn",
        "bad_micro": "bad_micro",  # if present in app later
        "carbide": "carbide",      # if present in app later
        "exploded": "other",       # fallback
        "time": "other",
        "sw": "sw",
        "sh": "dft",
    }

    out: Dict[str, Dict[str, Any]] = {}

    current_cat = ""
    for _, row in df.iterrows():
        cat = clean(row.get(cat_col))
        if cat:
            current_cat = cat
        cause = clean(row.get(cause_col))
        if not cause or cause.lower() in {"nan", "none"}:
            continue

        for dc in defect_cols:
            if not is_mark(row.get(dc)):
                continue
            raw = clean(dc)
            k0 = keyify(raw)
            k = explicit.get(k0, explicit.get(raw.strip().lower(), k0))
            if not k:
                continue
            if k not in out:
                out[k] = {"defect_key": k, "labels": sorted({raw, k}), "causes": []}
            out[k]["causes"].append({"category": current_cat or "Unknown", "cause": cause})

    # De-dup causes
    for k, v in out.items():
        seen = set()
        dedup = []
        for c in v["causes"]:
            key = (c["category"].strip().lower(), c["cause"].strip().lower())
            if key in seen:
                continue
            seen.add(key)
            dedup.append(c)
        v["causes"] = dedup

    sys.stdout.write(json.dumps(out, ensure_ascii=False))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())

