# This file was extracted from the HV SDK Docusaurus examples. # It is intended as a downloadable, runnable companion to the documentation. # Set HSI_EXAMPLE_BASE_DIR and related env vars to use your own data. # Source page: /hsi/hv_sdk/examples/regression#evaluate-training-fit-on-rois # region: setup import os from pathlib import Path import joblib import numpy as np import qtec_hv_sdk as hs import qtec_hv_sdk.annotations from qtec_hv_sdk.preprocessing import make_reference from qtec_hv_sdk.preprocessing import reflectance_calibration BASE_DIR = Path(os.environ.get("HSI_EXAMPLE_BASE_DIR", "/path/to/HSI_data/datasets")) if not BASE_DIR.exists(): raise SystemExit( "Run: 'export HSI_EXAMPLE_BASE_DIR=/path/to/HSI_data/' to setup the " "folder containing the example datacubes." ) DARK_REF = os.environ.get("HSI_EXAMPLE_DARK_REF", "dark_ref.pam") WHITE_REF = os.environ.get("HSI_EXAMPLE_WHITE_REF", "white_ref.pam") REGRESSION_MODEL_PATH = Path(os.environ.get("HSI_EXAMPLE_REGRESSION_MODEL", "regression_model.joblib")) MILK_TRAIN_CUBE = os.environ.get("HSI_EXAMPLE_MILK_TRAIN_CUBE", "milk.pam") MILK_ANNOTATIONS = os.environ.get("HSI_EXAMPLE_MILK_ANNOTATIONS", "milk_fat_roi.json") TARGET_PROPERTY = os.environ.get("HSI_EXAMPLE_TARGET_PROPERTY", "fat") SAMPLE_TYPE = "milk" def path_from_base(path): path = Path(path) if path.is_absolute(): return path return BASE_DIR / path def required_data_path(path, description): resolved = path_from_base(path) if not resolved.exists(): raise SystemExit( f"Missing {description}: {resolved}\n" "The regression examples use the milk-fat dataset. Set " "HSI_EXAMPLE_BASE_DIR to the folder containing the milk cube, " "milk_fat_roi.json, and matching dark/white references." ) return resolved def make_references(): dark = hs.open(str(required_data_path(DARK_REF, "dark reference"))) white = hs.open(str(required_data_path(WHITE_REF, "white reference"))) return make_reference(dark), make_reference(white) def annotation_value(value): if value is None or isinstance(value, (str, int, float)): return value try: return value[0] except TypeError: return value def load_regression_model(): if not REGRESSION_MODEL_PATH.exists(): raise SystemExit( f"Regression model not found at {REGRESSION_MODEL_PATH}. " "Run 01_train_a_regression_model_from_rois.py first, or set HSI_EXAMPLE_REGRESSION_MODEL." ) return joblib.load(REGRESSION_MODEL_PATH) # end region # region: example from sklearn.metrics import mean_absolute_error from sklearn.metrics import mean_squared_error from sklearn.metrics import r2_score dark_ref, white_ref = make_references() def open_absorbance_cube(cube_name): img = hs.open(str(required_data_path(cube_name, "milk datacube"))) reflectance = reflectance_calibration(img, white_ref, dark_ref, clip=True) reflectance = reflectance.ensure_dtype(hs.float32).clip(1e-6, 1.0) return reflectance.ufunc(lambda meta, plane: -np.log10(np.clip(plane, 1e-6, 1.0))) def load_milk_annotations(): annotations_path = required_data_path(MILK_ANNOTATIONS, "milk annotations JSON") return hs.annotations.open(str(annotations_path)) def extract_roi_records(cube, ann_file, target_property=TARGET_PROPERTY): roi_records = [] for roi_index, annot in enumerate(ann_file.annotations): properties = annot.properties if annotation_value(properties.get("type")) != SAMPLE_TYPE: continue if target_property not in properties: continue selected = cube.select_mask_from_descriptor(annot.descriptor) spectra = selected.to_numpy_with_interleave(hs.bip)[:, 0, :] target = float(annotation_value(properties[target_property])) roi_records.append( { "name": f"ROI {roi_index}", "spectra": spectra, "target": target, } ) if not roi_records: raise SystemExit("No labelled milk ROIs found in the annotations file.") return roi_records reg = load_regression_model() milk_annotations = load_milk_annotations() train_absorbance = open_absorbance_cube(MILK_TRAIN_CUBE) roi_records = extract_roi_records(train_absorbance, milk_annotations) # This is a training-fit check: it measures how well the saved model predicts # the same labelled ROIs it was trained from. Use separate scans/ROIs for a real # validation score. pixel_true = [] pixel_pred = [] roi_true = [] roi_pred = [] for record in roi_records: predictions = reg.predict(record["spectra"]).ravel() target_values = np.full(predictions.shape, record["target"], dtype=np.float32) pixel_true.append(target_values) pixel_pred.append(predictions) roi_true.append(record["target"]) roi_pred.append(float(predictions.mean())) print( f"{record['name']}: target={record['target']:.2f}, " f"predicted mean={predictions.mean():.2f}, " f"pixel range={predictions.min():.2f} to {predictions.max():.2f}" ) pixel_true = np.concatenate(pixel_true) pixel_pred = np.concatenate(pixel_pred) roi_true = np.asarray(roi_true) roi_pred = np.asarray(roi_pred) pixel_rmse = np.sqrt(mean_squared_error(pixel_true, pixel_pred)) roi_rmse = np.sqrt(mean_squared_error(roi_true, roi_pred)) print("\nPixel-level training-fit metrics") print(f"MAE: {mean_absolute_error(pixel_true, pixel_pred):.2f}") print(f"RMSE: {pixel_rmse:.2f}") print(f"R2: {r2_score(pixel_true, pixel_pred):.3f}") print("\nROI-mean training-fit metrics") print(f"MAE: {mean_absolute_error(roi_true, roi_pred):.2f}") print(f"RMSE: {roi_rmse:.2f}") print(f"R2: {r2_score(roi_true, roi_pred):.3f}") # end region