"""
storage.py  —  Model artifact storage (Amazon S3 or local filesystem)
=====================================================================
Trained-model artifacts (.pkl pickles + .json) are BINARY/SEMI-STRUCTURED blobs.
They belong in object storage (S3), NOT in Redshift — Redshift is a columnar
analytics warehouse with no binary type (max VARCHAR is 64 KB) and is the wrong
tool for read-modify-write of multi-MB model files.

This module gives train.py and server.py one storage API regardless of where
artifacts live, so you flip backends with a single environment variable.

Backend selection:
    • MODELS_S3_BUCKET set  → store/load in S3
    • otherwise             → local filesystem under MODELS_DIR (legacy behaviour)

Environment variables:
    MODELS_S3_BUCKET   bucket name, e.g. "foundry-ml-models"   (enables S3)
    MODELS_S3_PREFIX   key prefix,  default "models"
    AWS_REGION         e.g. "ap-south-1" (else default boto3 credential chain)
    MODELS_DIR         local fallback directory, default "./models"

Credentials come from the standard AWS chain (IAM role / env / ~/.aws) — never
hard-code them. boto3 is only imported when S3 is actually used.
"""

from __future__ import annotations

import io
import json
import os
from pathlib import Path

from joblib import dump as _joblib_dump, load as _joblib_load

S3_BUCKET = os.environ.get("MODELS_S3_BUCKET", "").strip()
S3_PREFIX = os.environ.get("MODELS_S3_PREFIX", "models").strip().strip("/")
LOCAL_DIR = Path(os.environ.get("MODELS_DIR", "./models"))

_USE_S3 = bool(S3_BUCKET)
_s3_client = None


def using_s3() -> bool:
    return _USE_S3


def backend_description() -> str:
    return f"s3://{S3_BUCKET}/{S3_PREFIX}" if _USE_S3 else str(LOCAL_DIR.resolve())


def _client():
    global _s3_client
    if _s3_client is None:
        import boto3  # imported lazily so local mode needs no boto3 install
        region = os.environ.get("AWS_REGION") or None
        _s3_client = boto3.client("s3", region_name=region)
    return _s3_client


def _key(plant: str, name: str) -> str:
    return f"{S3_PREFIX}/{plant.upper()}/{name}"


def _local_path(plant: str, name: str) -> Path:
    return LOCAL_DIR / plant.upper() / name


# ── Pickle artifacts (joblib) ─────────────────────────────────────────────────
def save_pickle(plant: str, name: str, obj) -> None:
    if _USE_S3:
        buf = io.BytesIO()
        _joblib_dump(obj, buf)
        buf.seek(0)
        _client().put_object(Bucket=S3_BUCKET, Key=_key(plant, name), Body=buf.getvalue())
    else:
        d = LOCAL_DIR / plant.upper()
        d.mkdir(parents=True, exist_ok=True)
        _joblib_dump(obj, d / name)


def load_pickle(plant: str, name: str):
    """Return the deserialized object, or None if the artifact is absent."""
    if _USE_S3:
        try:
            resp = _client().get_object(Bucket=S3_BUCKET, Key=_key(plant, name))
        except Exception:
            return None
        return _joblib_load(io.BytesIO(resp["Body"].read()))
    p = _local_path(plant, name)
    return _joblib_load(p) if p.exists() else None


# ── JSON artifacts ────────────────────────────────────────────────────────────
def save_json(plant: str, name: str, data) -> None:
    payload = json.dumps(data, indent=2)
    if _USE_S3:
        _client().put_object(
            Bucket=S3_BUCKET, Key=_key(plant, name),
            Body=payload.encode("utf-8"), ContentType="application/json",
        )
    else:
        d = LOCAL_DIR / plant.upper()
        d.mkdir(parents=True, exist_ok=True)
        (d / name).write_text(payload)


def load_json(plant: str, name: str):
    if _USE_S3:
        try:
            resp = _client().get_object(Bucket=S3_BUCKET, Key=_key(plant, name))
        except Exception:
            return None
        return json.loads(resp["Body"].read())
    p = _local_path(plant, name)
    return json.loads(p.read_text()) if p.exists() else None


# ── Existence checks ──────────────────────────────────────────────────────────
def artifact_exists(plant: str, name: str) -> bool:
    if _USE_S3:
        try:
            _client().head_object(Bucket=S3_BUCKET, Key=_key(plant, name))
            return True
        except Exception:
            return False
    return _local_path(plant, name).exists()


def plant_has_models(plant: str) -> bool:
    return (
        artifact_exists(plant, "risk_model.pkl")
        or artifact_exists(plant, "defect_model.pkl")
        or artifact_exists(plant, "anomaly_model.pkl")
    )
