Source code for easy_vic_build.tools.calibrate_func.sampling

# code: utf-8
# author: Xudong Zheng
# email: z786909151@163.com

"""
Sampling utilities for calibration and parameter exploration.

This module provides commonly used random and quasi-random sampling routines:

- uniform and Gaussian sampling,
- Latin Hypercube Sampling (LHS),
- Sobol and Halton low-discrepancy sequences,
- discrete sampling with optional constraints.

"""

import random

import numpy as np
from scipy.stats import qmc

from ..params_func.params_set import *


[docs] def sampling_uniform(n_samples, bounds): """ Generate random samples from a uniform distribution within the specified bounds. Parameters ---------- n_samples : int The number of samples to generate. bounds : tuple of float The lower and upper bounds for the uniform distribution. Returns ------- samples : list of float The generated random samples. """ samples = [random.uniform(bounds[0], bounds[1]) for _ in range(n_samples)] return samples
[docs] def sampling_uniform_int(n_samples, bounds): """ Generate random integer samples from a uniform distribution within the specified bounds. Parameters ---------- n_samples : int The number of integer samples to generate. bounds : tuple of int The lower and upper bounds for the uniform distribution. Returns ------- samples : list of int The generated random integer samples. """ samples = [random.randint(bounds[0], bounds[1]) for _ in range(n_samples)] return samples
[docs] def sampling_gaussian(n_samples, mean, std): """ Generate random samples from a Gaussian (normal) distribution with specified mean and standard deviation. Parameters ---------- n_samples : int The number of samples to generate. mean : float The mean of the Gaussian distribution. std : float The standard deviation of the Gaussian distribution. Returns ------- samples : list of float The generated random samples. """ samples = [random.gauss(mean, std) for _ in range(n_samples)] return samples
[docs] def sampling_gaussian_clip(n_samples, mean, std, low=None, up=None): """ Generate random samples from a Gaussian distribution with specified mean and standard deviation, and clip the samples to the specified range [low, up]. Parameters ---------- n_samples : int The number of samples to generate. mean : float The mean of the Gaussian distribution. std : float The standard deviation of the Gaussian distribution. low : float, optional The lower bound for clipping the samples. up : float, optional The upper bound for clipping the samples. Returns ------- samples : numpy.ndarray The generated random samples, clipped to the specified range. """ samples = np.random.normal(loc=mean, scale=std, size=n_samples) if low is not None or up is not None: samples = np.clip(samples, low, up) return samples
[docs] def sampling_LHS_1(n_samples, n_dimensions, bounds): """ Generate random samples using Latin Hypercube Sampling (LHS) method, variant 1, within the specified bounds. Parameters ---------- n_samples : int The number of samples to generate. n_dimensions : int The number of dimensions for each sample. bounds : list of tuple A list of tuples specifying the lower and upper bounds for each dimension. Returns ------- samples : numpy.ndarray The generated Latin Hypercube samples. """ # i.e., bounds = [(0, 10), (-5, 5), (100, 200)] samples = np.zeros((n_samples, n_dimensions)) for i in range(n_dimensions): # generate data between 0~1 intervals = np.linspace(0, 1, n_samples + 1) points = np.random.uniform(intervals[:-1], intervals[1:]) np.random.shuffle(points) # remapping to bounds min_val, max_val = bounds[i] samples[:, i] = points * (max_val - min_val) + min_val return samples
[docs] def sampling_LHS_2(n_samples, bounds, seed=None): """ Generate random samples using Latin Hypercube Sampling (LHS) method, variant 2, within the specified bounds. Parameters ---------- n_samples : int The number of samples to generate. bounds : list of tuple A list of tuples specifying the lower and upper bounds for each dimension. seed : int, optional Seed used by ``scipy.stats.qmc.LatinHypercube``. Returns ------- numpy.ndarray The generated Latin Hypercube samples, scaled to the specified bounds. Raises ------ ValueError If any bound does not satisfy ``min < max``. """ # i.e., bounds = [(0, 1), (5, 10), (-5, 5)] n_dimensions = len(bounds) # check bounds if any(b[0] >= b[1] for b in bounds): raise ValueError("Each bound must satisfy min < max.") # sample sampler = qmc.LatinHypercube(d=n_dimensions, seed=seed) sample = sampler.random(n=n_samples) # remapping lower_bounds, upper_bounds = np.array([b[0] for b in bounds]), np.array( [b[1] for b in bounds] ) scaled_samples = qmc.scale(sample, lower_bounds, upper_bounds) # clip boundary scaled_samples = np.clip(scaled_samples, lower_bounds, upper_bounds) return scaled_samples
[docs] def sampling_Sobol(n_samples, bounds): """ Generate random samples using the Sobol sequence method within the specified bounds. Parameters ---------- n_samples : int The number of samples to generate. bounds : list of tuple A list of tuples specifying the lower and upper bounds for each dimension. Returns ------- numpy.ndarray The generated Sobol samples, scaled to the specified bounds. """ n_dimensions = len(bounds) sobol_sampler = qmc.Sobol(d=n_dimensions, scramble=True) samples = sobol_sampler.random(n=n_samples) # get bounds lower_bounds = np.array([b[0] for b in bounds]) upper_bounds = np.array([b[1] for b in bounds]) # remapping scaled_samples = qmc.scale(samples, lower_bounds, upper_bounds) return scaled_samples
[docs] def sampling_Halton(n_samples, bounds): """ Generate random samples using the Halton sequence method within the specified bounds. Parameters ---------- n_samples : int The number of samples to generate. bounds : list of tuple A list of tuples specifying the lower and upper bounds for each dimension. Returns ------- numpy.ndarray The generated Halton samples, scaled to the specified bounds. """ n_dimensions = len(bounds) halton_sampler = qmc.Halton(d=n_dimensions, scramble=True) samples = halton_sampler.random(n=n_samples) # get bounds lower_bounds = np.array([b[0] for b in bounds]) upper_bounds = np.array([b[1] for b in bounds]) # remapping scaled_samples = qmc.scale(samples, lower_bounds, upper_bounds) return scaled_samples
[docs] def sampling_discrete(discrete_values, n_samples, weights=None): """ Generate random samples from a set of discrete values, optionally with weights. Parameters ---------- discrete_values : array-like A list or array of discrete values to sample from. n_samples : int The number of samples to generate. weights : array-like, optional The weights associated with the discrete values. If None, the values are assumed to be equally likely. Returns ------- samples : numpy.ndarray The generated discrete samples. """ if weights is None: samples = np.random.choice(discrete_values, size=n_samples) else: samples = np.random.choice(discrete_values, size=n_samples, p=weights) return samples
[docs] def sampling_discrete_constrained(discrete_values, target_sum, n_samples): """ Generate random samples from discrete values, with the constraint that the sum of the samples equals target_sum. Parameters ---------- discrete_values : array-like A list or array of discrete values to sample from. target_sum : int The target sum for the generated samples. n_samples : int The number of samples to generate. Returns ------- samples : numpy.ndarray The generated discrete samples with the constraint on their sum. """ # i.e., discrete_values = np.array([0, 1, 2]), target_sum = 2, n_samples = 10 samples = [] for _ in range(n_samples): sample = np.random.multinomial( target_sum, [1 / len(discrete_values)] * len(discrete_values) ) samples.append(sample) return np.array(samples)
[docs] def mixed_sampling(n_samples): """ Reserved interface for mixed sampling strategies. Parameters ---------- n_samples : int Requested number of samples. Returns ------- None This function currently has no implementation. Notes ----- This function is intentionally left as a placeholder for future extension. """ pass