# This file was extracted from the HV SDK Docusaurus examples. # It is intended as a downloadable, runnable companion to the documentation. # Set HSI_EXAMPLE_BASE_DIR and related env vars to use your own data. # Source page: /hsi/hv_sdk/examples/regression#train-a-regression-model-from-rois # region: setup import os from pathlib import Path import joblib import numpy as np import qtec_hv_sdk as hs import qtec_hv_sdk.annotations from qtec_hv_sdk.preprocessing import make_reference from qtec_hv_sdk.preprocessing import reflectance_calibration BASE_DIR = Path(os.environ.get("HSI_EXAMPLE_BASE_DIR", "/path/to/HSI_data/datasets")) if not BASE_DIR.exists(): raise SystemExit( "Run: 'export HSI_EXAMPLE_BASE_DIR=/path/to/HSI_data/' to setup the " "folder containing the example datacubes." ) DARK_REF = os.environ.get("HSI_EXAMPLE_DARK_REF", "dark_ref.pam") WHITE_REF = os.environ.get("HSI_EXAMPLE_WHITE_REF", "white_ref.pam") REGRESSION_MODEL_PATH = Path(os.environ.get("HSI_EXAMPLE_REGRESSION_MODEL", "regression_model.joblib")) MILK_TRAIN_CUBE = os.environ.get("HSI_EXAMPLE_MILK_TRAIN_CUBE", "milk.pam") MILK_ANNOTATIONS = os.environ.get("HSI_EXAMPLE_MILK_ANNOTATIONS", "milk_fat_roi.json") TARGET_PROPERTY = os.environ.get("HSI_EXAMPLE_TARGET_PROPERTY", "fat") SAMPLE_TYPE = "milk" def path_from_base(path): path = Path(path) if path.is_absolute(): return path return BASE_DIR / path def required_data_path(path, description): resolved = path_from_base(path) if not resolved.exists(): raise SystemExit( f"Missing {description}: {resolved}\n" "The regression examples use the milk-fat dataset. Set " "HSI_EXAMPLE_BASE_DIR to the folder containing the milk cube, " "milk_fat_roi.json, and matching dark/white references." ) return resolved def make_references(): dark = hs.open(str(required_data_path(DARK_REF, "dark reference"))) white = hs.open(str(required_data_path(WHITE_REF, "white reference"))) return make_reference(dark), make_reference(white) def annotation_value(value): if value is None or isinstance(value, (str, int, float)): return value try: return value[0] except TypeError: return value # end region # region: example from sklearn.cross_decomposition import PLSRegression from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler dark_ref, white_ref = make_references() def open_absorbance_cube(cube_name): img = hs.open(str(required_data_path(cube_name, "milk datacube"))) reflectance = reflectance_calibration(img, white_ref, dark_ref, clip=True) reflectance = reflectance.ensure_dtype(hs.float32).clip(1e-6, 1.0) return reflectance.ufunc(lambda meta, plane: -np.log10(np.clip(plane, 1e-6, 1.0))) def load_milk_annotations(): annotations_path = required_data_path(MILK_ANNOTATIONS, "milk annotations JSON") return hs.annotations.open(str(annotations_path)) def extract_regression_pixels(cube, ann_file, target_property=TARGET_PROPERTY): pixels_list = [] targets_list = [] for annot in ann_file.annotations: properties = annot.properties if annotation_value(properties.get("type")) != SAMPLE_TYPE: continue if target_property not in properties: continue selected = cube.select_mask_from_descriptor(annot.descriptor) spectra = selected.to_numpy_with_interleave(hs.bip)[:, 0, :] target = float(annotation_value(properties[target_property])) pixels_list.append(spectra) targets_list.append(np.full(spectra.shape[0], target, dtype=np.float32)) if not pixels_list: raise SystemExit("No labelled milk ROIs found in the annotations file.") return np.concatenate(pixels_list), np.concatenate(targets_list) milk_annotations = load_milk_annotations() train_absorbance = open_absorbance_cube(MILK_TRAIN_CUBE) reg_pixels, reg_targets = extract_regression_pixels(train_absorbance, milk_annotations) reg = make_pipeline( StandardScaler(), PLSRegression(n_components=8), ) reg.fit(reg_pixels, reg_targets) joblib.dump(reg, REGRESSION_MODEL_PATH) print(f"PLS training pixels: {reg_pixels.shape[0]}") print(f"{TARGET_PROPERTY} range: {reg_targets.min():.2f} to {reg_targets.max():.2f}") print(f"Saved regressor to {REGRESSION_MODEL_PATH}") # end region