Model estimationΒΆ

Estimation pipeline for discrete and hybrid choice models.

This module defines the high-level logic used to construct and estimate choice models and hybrid choice (MIMIC) models using Biogeme. The behavior of the pipeline is fully controlled by a Config object, allowing the same code to be reused across multiple experimental configurations.

Depending on the configuration, the module can:

  • estimate a standard discrete choice model without latent variables,

  • estimate a hybrid choice model with two latent variables and measurement equations,

  • perform either maximum likelihood or Bayesian estimation,

  • optionally combine the choice model likelihood with the measurement likelihood.

The module is intentionally declarative: model structure is assembled here, while data handling, estimation, and result post-processing are delegated to specialized helper modules.

Michel Bierlaire Thu Dec 25 2025, 08:13:25

from IPython.core.display_functions import display
from biogeme.bayesian_estimation import (
    get_pandas_estimated_parameters as get_pandas_bayesian_estimated_parameters,
)
from biogeme.biogeme import BIOGEME
from biogeme.expressions import Expression, MonteCarlo, log
from biogeme.latent_variables import EstimationMode
from biogeme.models import logit, loglogit
from biogeme.results_processing import (
    get_pandas_estimated_parameters as get_pandas_ml_estimated_parameters,
)

from choice_model import generate_choice_model
from config import (
    Config,
)
from mimic import generate_mimic_model
from optima import Choice, read_data
from read_or_estimate import read_or_estimate


def generate_expression(config: Config) -> Expression:
    """Generate the likelihood expression to be estimated.

    This function constructs the Biogeme expression corresponding to the
    selected model configuration.

    - If ``config.latent_variables == 'zero'``, only the discrete choice
      model likelihood is returned.
    - Otherwise, the likelihood from the latent-variable measurement
      equations is constructed and optionally combined with the choice
      model likelihood.

    The expression is wrapped differently depending on the estimation
    paradigm:

    - Bayesian estimation uses log-likelihood expressions (``loglogit`` and
      log measurement equations).
    - Maximum likelihood estimation uses Monte Carlo integration when
      required.

    :param config: Configuration object controlling model structure and
        estimation mode.
    :return: A Biogeme expression representing the full likelihood to be
        estimated.
    """
    utilities = generate_choice_model(config=config)

    # If there are no latent variables, return only the choice model.
    if config.latent_variables == "zero":
        return (
            loglogit(utilities, None, Choice)
            if config.estimation == "bayes"
            else log(MonteCarlo(logit(utilities, None, Choice)))
        )

    mimic = generate_mimic_model(config=config)

    # Build the "inside" of the likelihood once
    if config.estimation == "bayes":
        inner = mimic.log_measurement_equations()
        if config.choice_model == "yes":
            inner = loglogit(utilities, None, Choice) + inner
        return inner

    # ML
    inner = mimic.measurement_equations()
    if config.choice_model == "yes":
        inner = logit(utilities, None, Choice) * inner
    return log(MonteCarlo(inner))


def estimate_model(config: Config) -> None:
    """Estimate the model specified by the given configuration.

    This function:

    1. Builds the likelihood expression using :func:`generate_expression`.
    2. Creates and configures a :class:`BIOGEME` object.
    3. Either reads existing estimation results from disk or runs a new
       estimation.
    4. Prints a short textual summary and displays the estimated parameters
       in a pandas table.

    :param config: Configuration object defining the model specification,
        estimation method, and numerical settings.
    """
    the_expression = generate_expression(config=config)
    estimation_mode = (
        EstimationMode.BAYESIAN
        if config.estimation == "bayes"
        else EstimationMode.MAXIMUM_LIKELIHOOD
    )
    # %%
    # Read the data
    database = read_data()

    # %%
    # Create the Biogeme object
    the_biogeme = BIOGEME(
        database,
        the_expression,
        warmup=config.number_of_bayesian_draws_per_chain,
        bayesian_draws=config.number_of_bayesian_draws_per_chain,
        chains=4,
        number_of_draws=config.number_of_monte_carlo_draws,
        calculating_second_derivatives='never',
        numerically_safe=True,
        max_iterations=5000,
    )
    the_biogeme.model_name = config.name

    # %%
    # If estimation results are saved on file, we read them to speed up the process.
    # If not, we estimate the parameters.
    results = read_or_estimate(
        the_biogeme=the_biogeme,
        estimation_mode=estimation_mode,
        directory='saved_results',
    )

    # %%
    print(results.short_summary())

    # %%
    # Get the results in a pandas table
    pandas_results = (
        get_pandas_ml_estimated_parameters(
            estimation_results=results,
        )
        if estimation_mode == EstimationMode.MAXIMUM_LIKELIHOOD
        else get_pandas_bayesian_estimated_parameters(estimation_results=results)
    )
    display(pandas_results)

Gallery generated by Sphinx-Gallery