# This file was extracted from the HV SDK Docusaurus examples. # It is intended as a downloadable, runnable companion to the documentation. # Set HSI_EXAMPLE_BASE_DIR and related env vars to use your own data. # Source page: /hsi/hv_sdk/examples/pca#advanced-memory-efficient-sampling # region: setup import os from pathlib import Path import joblib import numpy as np import qtec_hv_sdk as hs from qtec_hv_sdk.preprocessing import make_reference from qtec_hv_sdk.preprocessing import reflectance_calibration from sklearn.decomposition import PCA BASE_DIR = Path(os.environ.get("HSI_EXAMPLE_BASE_DIR", "/path/to/HSI_data/nuts")) if not BASE_DIR.exists(): raise SystemExit( "Run: 'export HSI_EXAMPLE_BASE_DIR=/path/to/HSI_data/' to setup the " "folder containing the example datacubes." ) TRAIN_CUBE = os.environ.get("HSI_EXAMPLE_TRAIN_CUBE", "mix1.pam") DARK_REF = os.environ.get("HSI_EXAMPLE_DARK_REF", "dark_ref.pam") WHITE_REF = os.environ.get("HSI_EXAMPLE_WHITE_REF", "white_ref.pam") PCA_MODEL_PATH = Path(os.environ.get("HSI_EXAMPLE_PCA_MODEL", "pca_model.joblib")) def make_references(): dark = hs.open(str(BASE_DIR / DARK_REF)) white = hs.open(str(BASE_DIR / WHITE_REF)) return make_reference(dark), make_reference(white) def open_reflectance_cube(cube_name=TRAIN_CUBE): dark_ref, white_ref = make_references() img = hs.open(str(BASE_DIR / cube_name)) return reflectance_calibration(img, white_ref, dark_ref, clip=True) # end region # region: example def sample_pixels_with_ufunc(image, n_samples_per_line=10, random_seed=42): rng = np.random.default_rng(random_seed) # BIL gives the ufunc one full line at a time as [bands, samples], so we can # sample pixels without materializing the whole crop first. image = image.to_interleave(hs.bil) def select_pixels(meta, line): sample_indices = rng.choice(line.shape[1], size=n_samples_per_line, replace=False) return line[:, sample_indices] sampled = image.ufunc(select_pixels).to_numpy_with_interleave(hs.bip) return sampled.reshape(-1, sampled.shape[-1]) reflectance = open_reflectance_cube() training_crop = reflectance[0:250, :, :] sample_pixels = sample_pixels_with_ufunc( training_crop, n_samples_per_line=10, random_seed=42, ) pca = PCA(n_components=6, random_state=42) pca.fit(sample_pixels) joblib.dump(pca, PCA_MODEL_PATH) print(f"Sample pixels: {sample_pixels.shape[0]}") print(f"Explained variance: {pca.explained_variance_ratio_}") print(f"Saved PCA model to {PCA_MODEL_PATH}") # end region