Note
Go to the end to download the full example code.
Model estimationΒΆ
Estimation pipeline for discrete and hybrid choice models.
This module defines the high-level logic used to construct and estimate
choice models and hybrid choice (MIMIC) models using Biogeme. The behavior
of the pipeline is fully controlled by a Config object, allowing
the same code to be reused across multiple experimental configurations.
Depending on the configuration, the module can:
estimate a standard discrete choice model without latent variables,
estimate a hybrid choice model with two latent variables and measurement equations,
perform either maximum likelihood or Bayesian estimation,
optionally combine the choice model likelihood with the measurement likelihood.
The module is intentionally declarative: model structure is assembled here, while data handling, estimation, and result post-processing are delegated to specialized helper modules.
Michel Bierlaire Thu Dec 25 2025, 08:13:25
from IPython.core.display_functions import display
from biogeme.bayesian_estimation import (
get_pandas_estimated_parameters as get_pandas_bayesian_estimated_parameters,
)
from biogeme.biogeme import BIOGEME
from biogeme.expressions import Expression, MonteCarlo, log
from biogeme.latent_variables import EstimationMode
from biogeme.models import logit, loglogit
from biogeme.results_processing import (
get_pandas_estimated_parameters as get_pandas_ml_estimated_parameters,
)
from choice_model import generate_choice_model
from config import (
Config,
)
from mimic import generate_mimic_model
from optima import Choice, read_data
from read_or_estimate import read_or_estimate
def generate_expression(config: Config) -> Expression:
"""Generate the likelihood expression to be estimated.
This function constructs the Biogeme expression corresponding to the
selected model configuration.
- If ``config.latent_variables == 'zero'``, only the discrete choice
model likelihood is returned.
- Otherwise, the likelihood from the latent-variable measurement
equations is constructed and optionally combined with the choice
model likelihood.
The expression is wrapped differently depending on the estimation
paradigm:
- Bayesian estimation uses log-likelihood expressions (``loglogit`` and
log measurement equations).
- Maximum likelihood estimation uses Monte Carlo integration when
required.
:param config: Configuration object controlling model structure and
estimation mode.
:return: A Biogeme expression representing the full likelihood to be
estimated.
"""
utilities = generate_choice_model(config=config)
# If there are no latent variables, return only the choice model.
if config.latent_variables == "zero":
return (
loglogit(utilities, None, Choice)
if config.estimation == "bayes"
else log(MonteCarlo(logit(utilities, None, Choice)))
)
mimic = generate_mimic_model(config=config)
# Build the "inside" of the likelihood once
if config.estimation == "bayes":
inner = mimic.log_measurement_equations()
if config.choice_model == "yes":
inner = loglogit(utilities, None, Choice) + inner
return inner
# ML
inner = mimic.measurement_equations()
if config.choice_model == "yes":
inner = logit(utilities, None, Choice) * inner
return log(MonteCarlo(inner))
def estimate_model(config: Config) -> None:
"""Estimate the model specified by the given configuration.
This function:
1. Builds the likelihood expression using :func:`generate_expression`.
2. Creates and configures a :class:`BIOGEME` object.
3. Either reads existing estimation results from disk or runs a new
estimation.
4. Prints a short textual summary and displays the estimated parameters
in a pandas table.
:param config: Configuration object defining the model specification,
estimation method, and numerical settings.
"""
the_expression = generate_expression(config=config)
estimation_mode = (
EstimationMode.BAYESIAN
if config.estimation == "bayes"
else EstimationMode.MAXIMUM_LIKELIHOOD
)
# %%
# Read the data
database = read_data()
# %%
# Create the Biogeme object
the_biogeme = BIOGEME(
database,
the_expression,
warmup=config.number_of_bayesian_draws_per_chain,
bayesian_draws=config.number_of_bayesian_draws_per_chain,
chains=4,
number_of_draws=config.number_of_monte_carlo_draws,
calculating_second_derivatives='never',
numerically_safe=True,
max_iterations=5000,
)
the_biogeme.model_name = config.name
# %%
# If estimation results are saved on file, we read them to speed up the process.
# If not, we estimate the parameters.
results = read_or_estimate(
the_biogeme=the_biogeme,
estimation_mode=estimation_mode,
directory='saved_results',
)
# %%
print(results.short_summary())
# %%
# Get the results in a pandas table
pandas_results = (
get_pandas_ml_estimated_parameters(
estimation_results=results,
)
if estimation_mode == EstimationMode.MAXIMUM_LIKELIHOOD
else get_pandas_bayesian_estimated_parameters(estimation_results=results)
)
display(pandas_results)