# This file was extracted from the HV SDK Docusaurus examples.
# It is intended as a downloadable, runnable companion to the documentation.
# Set HSI_EXAMPLE_BASE_DIR and related env vars to use your own data.
# Source page: /hsi/hv_sdk/examples/regression#evaluate-training-fit-on-rois

# region: setup
import os
from pathlib import Path

import joblib
import numpy as np
import qtec_hv_sdk as hs
import qtec_hv_sdk.annotations
from qtec_hv_sdk.preprocessing import make_reference
from qtec_hv_sdk.preprocessing import reflectance_calibration


BASE_DIR = Path(os.environ.get("HSI_EXAMPLE_BASE_DIR", "/path/to/HSI_data/datasets"))
if not BASE_DIR.exists():
    raise SystemExit(
        "Run: 'export HSI_EXAMPLE_BASE_DIR=/path/to/HSI_data/' to setup the "
        "folder containing the example datacubes."
    )
DARK_REF = os.environ.get("HSI_EXAMPLE_DARK_REF", "dark_ref.pam")
WHITE_REF = os.environ.get("HSI_EXAMPLE_WHITE_REF", "white_ref.pam")
REGRESSION_MODEL_PATH = Path(os.environ.get("HSI_EXAMPLE_REGRESSION_MODEL", "regression_model.joblib"))
MILK_TRAIN_CUBE = os.environ.get("HSI_EXAMPLE_MILK_TRAIN_CUBE", "milk.pam")
MILK_ANNOTATIONS = os.environ.get("HSI_EXAMPLE_MILK_ANNOTATIONS", "milk_fat_roi.json")
TARGET_PROPERTY = os.environ.get("HSI_EXAMPLE_TARGET_PROPERTY", "fat")
SAMPLE_TYPE = "milk"


def path_from_base(path):
    path = Path(path)
    if path.is_absolute():
        return path
    return BASE_DIR / path


def required_data_path(path, description):
    resolved = path_from_base(path)
    if not resolved.exists():
        raise SystemExit(
            f"Missing {description}: {resolved}\n"
            "The regression examples use the milk-fat dataset. Set "
            "HSI_EXAMPLE_BASE_DIR to the folder containing the milk cube, "
            "milk_fat_roi.json, and matching dark/white references."
        )
    return resolved


def make_references():
    dark = hs.open(str(required_data_path(DARK_REF, "dark reference")))
    white = hs.open(str(required_data_path(WHITE_REF, "white reference")))
    return make_reference(dark), make_reference(white)


def annotation_value(value):
    if value is None or isinstance(value, (str, int, float)):
        return value
    try:
        return value[0]
    except TypeError:
        return value


def load_regression_model():
    if not REGRESSION_MODEL_PATH.exists():
        raise SystemExit(
            f"Regression model not found at {REGRESSION_MODEL_PATH}. "
            "Run 01_train_a_regression_model_from_rois.py first, or set HSI_EXAMPLE_REGRESSION_MODEL."
        )
    return joblib.load(REGRESSION_MODEL_PATH)
# end region

# region: example
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


dark_ref, white_ref = make_references()


def open_absorbance_cube(cube_name):
    img = hs.open(str(required_data_path(cube_name, "milk datacube")))
    reflectance = reflectance_calibration(img, white_ref, dark_ref, clip=True)
    reflectance = reflectance.ensure_dtype(hs.float32).clip(1e-6, 1.0)
    return reflectance.ufunc(lambda meta, plane: -np.log10(np.clip(plane, 1e-6, 1.0)))


def load_milk_annotations():
    annotations_path = required_data_path(MILK_ANNOTATIONS, "milk annotations JSON")
    return hs.annotations.open(str(annotations_path))


def extract_roi_records(cube, ann_file, target_property=TARGET_PROPERTY):
    roi_records = []

    for roi_index, annot in enumerate(ann_file.annotations):
        properties = annot.properties
        if annotation_value(properties.get("type")) != SAMPLE_TYPE:
            continue
        if target_property not in properties:
            continue

        selected = cube.select_mask_from_descriptor(annot.descriptor)
        spectra = selected.to_numpy_with_interleave(hs.bip)[:, 0, :]
        target = float(annotation_value(properties[target_property]))

        roi_records.append(
            {
                "name": f"ROI {roi_index}",
                "spectra": spectra,
                "target": target,
            }
        )

    if not roi_records:
        raise SystemExit("No labelled milk ROIs found in the annotations file.")
    return roi_records


reg = load_regression_model()
milk_annotations = load_milk_annotations()
train_absorbance = open_absorbance_cube(MILK_TRAIN_CUBE)
roi_records = extract_roi_records(train_absorbance, milk_annotations)

# This is a training-fit check: it measures how well the saved model predicts
# the same labelled ROIs it was trained from. Use separate scans/ROIs for a real
# validation score.
pixel_true = []
pixel_pred = []
roi_true = []
roi_pred = []

for record in roi_records:
    predictions = reg.predict(record["spectra"]).ravel()
    target_values = np.full(predictions.shape, record["target"], dtype=np.float32)

    pixel_true.append(target_values)
    pixel_pred.append(predictions)
    roi_true.append(record["target"])
    roi_pred.append(float(predictions.mean()))

    print(
        f"{record['name']}: target={record['target']:.2f}, "
        f"predicted mean={predictions.mean():.2f}, "
        f"pixel range={predictions.min():.2f} to {predictions.max():.2f}"
    )

pixel_true = np.concatenate(pixel_true)
pixel_pred = np.concatenate(pixel_pred)
roi_true = np.asarray(roi_true)
roi_pred = np.asarray(roi_pred)

pixel_rmse = np.sqrt(mean_squared_error(pixel_true, pixel_pred))
roi_rmse = np.sqrt(mean_squared_error(roi_true, roi_pred))

print("\nPixel-level training-fit metrics")
print(f"MAE:  {mean_absolute_error(pixel_true, pixel_pred):.2f}")
print(f"RMSE: {pixel_rmse:.2f}")
print(f"R2:   {r2_score(pixel_true, pixel_pred):.3f}")

print("\nROI-mean training-fit metrics")
print(f"MAE:  {mean_absolute_error(roi_true, roi_pred):.2f}")
print(f"RMSE: {roi_rmse:.2f}")
print(f"R2:   {r2_score(roi_true, roi_pred):.3f}")
# end region