Source code for biogeme.likelihood.bootstrap

import logging

from joblib import Parallel, delayed
from tqdm import tqdm
from tqdm_joblib import tqdm_joblib

from biogeme.calculator import CompiledFormulaEvaluator
from biogeme.default_parameters import ParameterValue
from biogeme.model_elements import ModelElements
from biogeme.optimization import OptimizationAlgorithm
from biogeme.second_derivatives import SecondDerivativesMode
from .model_estimation import AlgorithmResults, model_estimation

logger = logging.getLogger(__name__)


[docs] def bootstrap( number_of_bootstrap_samples, the_algorithm: OptimizationAlgorithm, modeling_elements: ModelElements, parameters: dict[str, ParameterValue], starting_values: dict[str, float], second_derivatives_mode: SecondDerivativesMode, numerically_safe: bool, use_jit: bool, number_of_jobs: int, ) -> list[AlgorithmResults]: """ Perform bootstrap estimation to assess the variability of model parameters. This function generates a specified number of bootstrap samples from the original dataset, estimates the model on each sample using the provided algorithm and parameters, and returns the collection of estimation results. :param number_of_bootstrap_samples: Number of bootstrap replications to perform. :param the_algorithm: The optimization algorithm used to estimate the model. :param modeling_elements: The components defining the model, including the database and log-likelihood expression. :param parameters: Configuration parameters used during estimation. :param starting_values: Dictionary of initial values for the model's free parameters. :param second_derivatives_mode: specifies how second derivatives are calculated. :param numerically_safe: improves the numerical stability of the calculations. :param use_jit: if True, performs just-in-time compilation. :param number_of_jobs: number of jobs for parallel execution of bootstrapping. :return: A list of tuples containing: - estimated parameter values (NumPy array), - diagnostic information from the optimizer (dictionary), - convergence status (boolean). """ the_database = modeling_elements.database def run_one_bootstrap_estimation(_): bootstrap_modeling_elements = ModelElements( expressions=modeling_elements.expressions, database=the_database.bootstrap_sample(), number_of_draws=modeling_elements.number_of_draws, draws_management=None, user_defined_draws=modeling_elements.user_defined_draws, expressions_registry=None, use_jit=use_jit, ) compiled_formula = CompiledFormulaEvaluator( model_elements=bootstrap_modeling_elements, second_derivatives_mode=second_derivatives_mode, numerically_safe=numerically_safe, ) one_result = model_estimation( the_algorithm=the_algorithm, function_evaluator=compiled_formula, parameters=parameters, some_starting_values=starting_values, save_iterations_filename=None, ) return one_result PARALLEL = True logger.info(f'Number of jobs for bootstrapping: {number_of_jobs}') if PARALLEL: with tqdm_joblib( tqdm( desc="Bootstraps", total=number_of_bootstrap_samples, ) ) as progress_bar: results = Parallel(n_jobs=number_of_jobs)( delayed(run_one_bootstrap_estimation)(_) for _ in range(number_of_bootstrap_samples) ) return results else: results = [] for _ in tqdm(range(number_of_bootstrap_samples)): results.append(run_one_bootstrap_estimation(_)) return results