# This file was extracted from the HV SDK Docusaurus examples.
# It is intended as a downloadable, runnable companion to the documentation.
# Set HSI_EXAMPLE_BASE_DIR and related env vars to use your own data.
# Source page: /hsi/hv_sdk/examples/regression#train-a-regression-model-from-rois

# region: setup
import os
from pathlib import Path

import joblib
import numpy as np
import qtec_hv_sdk as hs
import qtec_hv_sdk.annotations
from qtec_hv_sdk.preprocessing import make_reference
from qtec_hv_sdk.preprocessing import reflectance_calibration


BASE_DIR = Path(os.environ.get("HSI_EXAMPLE_BASE_DIR", "/path/to/HSI_data/datasets"))
if not BASE_DIR.exists():
    raise SystemExit(
        "Run: 'export HSI_EXAMPLE_BASE_DIR=/path/to/HSI_data/' to setup the "
        "folder containing the example datacubes."
    )
DARK_REF = os.environ.get("HSI_EXAMPLE_DARK_REF", "dark_ref.pam")
WHITE_REF = os.environ.get("HSI_EXAMPLE_WHITE_REF", "white_ref.pam")
REGRESSION_MODEL_PATH = Path(os.environ.get("HSI_EXAMPLE_REGRESSION_MODEL", "regression_model.joblib"))
MILK_TRAIN_CUBE = os.environ.get("HSI_EXAMPLE_MILK_TRAIN_CUBE", "milk.pam")
MILK_ANNOTATIONS = os.environ.get("HSI_EXAMPLE_MILK_ANNOTATIONS", "milk_fat_roi.json")
TARGET_PROPERTY = os.environ.get("HSI_EXAMPLE_TARGET_PROPERTY", "fat")
SAMPLE_TYPE = "milk"


def path_from_base(path):
    path = Path(path)
    if path.is_absolute():
        return path
    return BASE_DIR / path


def required_data_path(path, description):
    resolved = path_from_base(path)
    if not resolved.exists():
        raise SystemExit(
            f"Missing {description}: {resolved}\n"
            "The regression examples use the milk-fat dataset. Set "
            "HSI_EXAMPLE_BASE_DIR to the folder containing the milk cube, "
            "milk_fat_roi.json, and matching dark/white references."
        )
    return resolved


def make_references():
    dark = hs.open(str(required_data_path(DARK_REF, "dark reference")))
    white = hs.open(str(required_data_path(WHITE_REF, "white reference")))
    return make_reference(dark), make_reference(white)


def annotation_value(value):
    if value is None or isinstance(value, (str, int, float)):
        return value
    try:
        return value[0]
    except TypeError:
        return value
# end region

# region: example
from sklearn.cross_decomposition import PLSRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler


dark_ref, white_ref = make_references()


def open_absorbance_cube(cube_name):
    img = hs.open(str(required_data_path(cube_name, "milk datacube")))
    reflectance = reflectance_calibration(img, white_ref, dark_ref, clip=True)
    reflectance = reflectance.ensure_dtype(hs.float32).clip(1e-6, 1.0)
    return reflectance.ufunc(lambda meta, plane: -np.log10(np.clip(plane, 1e-6, 1.0)))


def load_milk_annotations():
    annotations_path = required_data_path(MILK_ANNOTATIONS, "milk annotations JSON")
    return hs.annotations.open(str(annotations_path))


def extract_regression_pixels(cube, ann_file, target_property=TARGET_PROPERTY):
    pixels_list = []
    targets_list = []

    for annot in ann_file.annotations:
        properties = annot.properties
        if annotation_value(properties.get("type")) != SAMPLE_TYPE:
            continue
        if target_property not in properties:
            continue

        selected = cube.select_mask_from_descriptor(annot.descriptor)
        spectra = selected.to_numpy_with_interleave(hs.bip)[:, 0, :]
        target = float(annotation_value(properties[target_property]))

        pixels_list.append(spectra)
        targets_list.append(np.full(spectra.shape[0], target, dtype=np.float32))

    if not pixels_list:
        raise SystemExit("No labelled milk ROIs found in the annotations file.")
    return np.concatenate(pixels_list), np.concatenate(targets_list)


milk_annotations = load_milk_annotations()
train_absorbance = open_absorbance_cube(MILK_TRAIN_CUBE)
reg_pixels, reg_targets = extract_regression_pixels(train_absorbance, milk_annotations)

reg = make_pipeline(
    StandardScaler(),
    PLSRegression(n_components=8),
)
reg.fit(reg_pixels, reg_targets)

joblib.dump(reg, REGRESSION_MODEL_PATH)

print(f"PLS training pixels: {reg_pixels.shape[0]}")
print(f"{TARGET_PROPERTY} range: {reg_targets.min():.2f} to {reg_targets.max():.2f}")
print(f"Saved regressor to {REGRESSION_MODEL_PATH}")
# end region