Source code for biogeme.likelihood.model_estimation

"""
Run a model estimation using a specified optimization algorithm.

This module defines the interface for executing a Biogeme-compatible
optimization routine and collecting its results.

Michel Bierlaire
Sun Mar 30 16:07:55 2025
"""

import logging
from datetime import datetime
from typing import Any, NamedTuple

import numpy as np
from biogeme_optimization.function import FunctionToMinimize

from biogeme.calculator import (
    CallableExpression,
    CompiledFormulaEvaluator,
    function_from_compiled_formula,
)
from biogeme.default_parameters import ParameterValue
from biogeme.likelihood.negative_likelihood import NegativeLikelihood
from biogeme.optimization import OptimizationAlgorithm

logger = logging.getLogger(__name__)


[docs] class AlgorithmResults(NamedTuple): """ Container for the results returned by an optimization algorithm. :param solution: Optimal values of the parameters as a NumPy array. :param optimization_messages: Dictionary with diagnostic messages from the optimizer. :param convergence: Boolean indicating whether the optimization terminated successfully. """ solution: np.ndarray optimization_messages: dict[str, Any] convergence: bool
[docs] def optimization( the_algorithm: OptimizationAlgorithm, the_function: FunctionToMinimize, starting_values: np.ndarray, bounds: list[tuple[float, float]], variable_names: list[str], parameters: dict[str, Any], ) -> AlgorithmResults: """ Run an optimization algorithm to estimate model parameters. :param the_algorithm: Optimization algorithm conforming to the Biogeme interface. :param the_function: Function to minimize, providing function value and derivatives. :param starting_values: Initial guess for the optimization variables. :param bounds: List of (lower, upper) bounds for each parameter. :param variable_names: Names of the variables (used for reporting or algorithm diagnostics). :param parameters: Dictionary of additional parameters passed to the optimization algorithm. :return: A tuple with: - x_star: the optimal solution (array of estimated parameters), - optimization_messages: a dictionary with diagnostic messages and timing, - convergence: a boolean indicating whether the optimization converged successfully. """ the_function.set_variables(starting_values) start_time = datetime.now() output = the_algorithm( fct=the_function, init_betas=starting_values, bounds=bounds, variable_names=variable_names, parameters=parameters, ) x_star, optimization_messages, convergence = output optimization_messages["Optimization time"] = datetime.now() - start_time return AlgorithmResults( solution=x_star, optimization_messages=optimization_messages, convergence=convergence, )
[docs] def model_estimation( the_algorithm: OptimizationAlgorithm, function_evaluator: CompiledFormulaEvaluator, parameters: dict[str, ParameterValue], some_starting_values: dict[str, float], save_iterations_filename: str | None, ) -> AlgorithmResults: """ Estimate a model using the specified optimization algorithm and modeling elements. This function prepares the model log-likelihood function and its derivatives based on the provided modeling elements and starting values. It constructs the objective function and delegates the actual optimization to the `optimization` routine. :param the_algorithm: The optimization algorithm to use. :param function_evaluator: Object with the compiled information to evaluate the function. :param parameters: Dictionary of configuration parameters for the estimation. :param some_starting_values: Initial values for a subset or all of the model's free parameters. :param save_iterations_filename: If not None, the name of the file where to save the best iterations. :return: A tuple containing: - the optimal parameter values as a NumPy array, - a dictionary of optimization diagnostic messages, - a boolean indicating whether the optimization converged successfully. """ starting_values = function_evaluator.model_elements.expressions_registry.complete_dict_of_free_beta_values( the_betas=some_starting_values ) the_function: CallableExpression = function_from_compiled_formula( the_compiled_function=function_evaluator, the_betas=starting_values, ) the_function_to_minimize = NegativeLikelihood( dimension=function_evaluator.model_elements.expressions_registry.number_of_free_betas, loglikelihood=the_function, parameters=parameters, ) the_function_to_minimize.set_variables(np.array(list(starting_values.values()))) if save_iterations_filename is not None: the_function_to_minimize.save_iterations( filename_for_best_iteration=save_iterations_filename, free_betas_names=function_evaluator.model_elements.expressions_registry.free_betas_names, ) max_number_parameters_to_report = parameters.get('max_number_parameters_to_report') variable_names = ( function_evaluator.model_elements.expressions_registry.free_betas_names if max_number_parameters_to_report is None else ( function_evaluator.model_elements.expressions_registry.free_betas_names if function_evaluator.model_elements.expressions_registry.number_of_free_betas <= max_number_parameters_to_report else None ) ) return optimization( the_algorithm=the_algorithm, the_function=the_function_to_minimize, starting_values=function_evaluator.model_elements.expressions_registry.get_betas_array( starting_values ), bounds=function_evaluator.model_elements.expressions_registry.bounds, variable_names=variable_names, parameters=parameters, )