# This file was extracted from the HV SDK Docusaurus examples.
# It is intended as a downloadable, runnable companion to the documentation.
# Set HSI_EXAMPLE_BASE_DIR and related env vars to use your own data.
# Source page: /hsi/hv_sdk/examples/classification#train-a-classifier-from-mean-spectra

# region: setup
import os
from pathlib import Path

import numpy as np
import pandas as pd
import qtec_hv_sdk as hs
from qtec_hv_sdk.preprocessing import make_reference
from qtec_hv_sdk.preprocessing import reflectance_calibration
from sklearn.neighbors import NearestCentroid


BASE_DIR = Path(os.environ.get("HSI_EXAMPLE_BASE_DIR", "/path/to/HSI_data/nuts"))
if not BASE_DIR.exists():
    raise SystemExit(
        "Run: 'export HSI_EXAMPLE_BASE_DIR=/path/to/HSI_data/' to setup the "
        "folder containing the example datacubes."
    )
TEST_CUBE = os.environ.get("HSI_EXAMPLE_TEST_CUBE", "mix2.pam")
DARK_REF = os.environ.get("HSI_EXAMPLE_DARK_REF", "dark_ref.pam")
WHITE_REF = os.environ.get("HSI_EXAMPLE_WHITE_REF", "white_ref.pam")
MEAN_SPECTRA_CSV = os.environ.get("HSI_EXAMPLE_MEAN_SPECTRA", "mix1_spectra.csv")
CLASS_PROPERTY = os.environ.get("HSI_EXAMPLE_CLASS_PROPERTY", "type")

WAVELENGTH_MIN_NM = 430.0
WAVELENGTH_MAX_NM = 1700.0


def make_references():
    dark = hs.open(str(BASE_DIR / DARK_REF))
    white = hs.open(str(BASE_DIR / WHITE_REF))
    return make_reference(dark), make_reference(white)


def open_reflectance_cube(cube_name):
    dark_ref, white_ref = make_references()
    img = hs.open(str(BASE_DIR / cube_name))
    return reflectance_calibration(img, white_ref, dark_ref, clip=True)


def wavelengths_for_bands(n_bands):
    return np.linspace(WAVELENGTH_MIN_NM, WAVELENGTH_MAX_NM, n_bands)

# end region

# region: example
def load_mean_spectra(csv_path, n_bands):
    csv_path = Path(csv_path) if Path(csv_path).is_absolute() else BASE_DIR / csv_path
    if not csv_path.exists():
        raise SystemExit("Set HSI_EXAMPLE_MEAN_SPECTRA to the exported mean spectra CSV file.")

    table = pd.read_csv(csv_path)
    wavelength_columns = sorted(
        [c for c in table.columns if c.replace(".", "", 1).isdigit()], key=float
    )
    if "name" not in table.columns or not wavelength_columns:
        raise SystemExit(
            "Mean spectra CSV must have a 'name' column and numeric wavelength columns."
        )

    spectra = table[["name", *wavelength_columns]].groupby("name", sort=True).mean()
    roi_names = spectra.index.to_numpy()
    csv_wavelengths = np.array([float(c) for c in wavelength_columns])
    cube_wavelengths = wavelengths_for_bands(n_bands)
    values = np.vstack([
        np.interp(cube_wavelengths, csv_wavelengths, row)
        for row in spectra.to_numpy()
    ]).astype(np.float32)
    return roi_names, values


test_reflectance = open_reflectance_cube(TEST_CUBE)

roi_names, mean_spectra = load_mean_spectra(
    MEAN_SPECTRA_CSV,
    n_bands=test_reflectance.shape.bands,
)

table = pd.read_csv(BASE_DIR / MEAN_SPECTRA_CSV)
if CLASS_PROPERTY not in table.columns:
    raise SystemExit(
        f"The exported mean spectra file must include a '{CLASS_PROPERTY}' column. "
        "Export spectra from HV Explorer with properties included."
    )

class_by_name = dict(zip(table["name"], table[CLASS_PROPERTY]))
class_labels = np.array([class_by_name[name] for name in roi_names])

clf = NearestCentroid()
clf.fit(mean_spectra, class_labels)

line_index = 100
frame_arr = test_reflectance.array_plane(line_index, hs.lines)
pixels_frame = frame_arr.T

predicted_class = clf.predict(pixels_frame)
classes, counts = np.unique(predicted_class, return_counts=True)
print(dict(zip(classes, counts.astype(int))))
# end region