Logit model

Estimation of a logit model with several algorithms.

Michel Bierlaire, EPFL Wed Jun 18 2025, 10:03:04

import itertools

import pandas as pd
from IPython.core.display_functions import display
from biogeme.biogeme import BIOGEME
from biogeme.exceptions import BiogemeError
from biogeme.expressions import Beta
from biogeme.models import loglogit
from biogeme.tools import format_timedelta
from tqdm import tqdm

See the data processing script: Data preparation for Swissmetro.

from swissmetro_data import (
    CAR_AV_SP,
    CAR_CO_SCALED,
    CAR_TT_SCALED,
    CHOICE,
    SM_AV,
    SM_COST_SCALED,
    SM_TT_SCALED,
    TRAIN_AV_SP,
    TRAIN_COST_SCALED,
    TRAIN_TT_SCALED,
    database,
)

Parameters to be estimated.

asc_car = Beta('asc_car', 0, None, None, 0)
asc_train = Beta('asc_train', 0, None, None, 0)
asc_sm = Beta('asc_sm', 0, None, None, 1)
b_time = Beta('b_time', 0, None, None, 0)
b_cost = Beta('b_cost', 0, None, None, 0)

Definition of the utility functions.

v_train = asc_train + b_time * TRAIN_TT_SCALED + b_cost * TRAIN_COST_SCALED
v_sm = asc_sm + b_time * SM_TT_SCALED + b_cost * SM_COST_SCALED
v_car = asc_car + b_time * CAR_TT_SCALED + b_cost * CAR_CO_SCALED

Associate utility functions with the numbering of alternatives.

V = {1: v_train, 2: v_sm, 3: v_car}

Associate the availability conditions with the alternatives.

av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}

Definition of the model. This is the contribution of each observation to the log likelihood function.

logprob = loglogit(V, av, CHOICE)

Options for the optimization algorithm

The conjugate gradient iteration can be constrained to stay feasible, or not.

infeasible_cg_values = [True, False]

The radius of the first trust region is tested with three different values.

initial_radius_values = [0.1, 1.0, 10.0]

The percentage of iterations such that the analytical second derivatives is evaluated.

second_derivatives_values = [0.0, 0.5, 1.0]

We run the optimization algorithm with all possible combinations of the parameters. The results are stored in a Pandas DataFrame called summary.

results = {}
summary_data = []

The first estimation is performed twice, to warm up the python code, so that the execution times are comparable

first = True
for infeasible_cg, initial_radius, second_derivatives in tqdm(
    itertools.product(
        infeasible_cg_values, initial_radius_values, second_derivatives_values
    )
):
    # Create the Biogeme object
    the_biogeme = BIOGEME(
        database,
        logprob,
        infeasible_cg=infeasible_cg,
        initial_radius=initial_radius,
        second_derivatives=second_derivatives,
        generate_html=False,
        generate_yaml=False,
    )

    name = (
        f'cg_{infeasible_cg}_radius_{initial_radius}_second_deriv_{second_derivatives}'
    )
    the_biogeme.model_name = f'b05normal_mixture_algo_{name}'.strip()
    result_data = {
        'InfeasibleCG': infeasible_cg,
        'InitialRadius': initial_radius,
        'SecondDerivatives': second_derivatives,
        'Status': 'Success',  # Assume success unless an exception is caught
    }

    try:
        results[name] = the_biogeme.estimate()
        if first:
            results[name] = the_biogeme.estimate()
            first = False
        opt_time = format_timedelta(
            results[name].optimization_messages["Optimization time"]
        )

        result_data.update(
            {
                'LogLikelihood': results[name].final_log_likelihood,
                'GradientNorm': results[name].gradient_norm,
                'Number of draws': results[name].number_of_draws,
                'Optimization time': opt_time,
                'TerminationCause': results[name].optimization_messages[
                    "Cause of termination"
                ],
            }
        )

    except BiogemeError as e:
        print(e)
        result_data.update(
            {
                'Status': 'Failed',
                'LogLikelihood': None,
                'GradientNorm': None,
                'Number of draws': None,
                'Optimization time': None,
                'TerminationCause': str(e),
            }
        )
        results[name] = None
    summary_data.append(result_data)

summary = pd.DataFrame(summary_data)
0it [00:00, ?it/s]
1it [00:01,  1.39s/it]
2it [00:02,  1.03s/it]
3it [00:02,  1.08it/s]
4it [00:04,  1.03s/it]
5it [00:04,  1.10it/s]
6it [00:05,  1.19it/s]
7it [00:06,  1.20it/s]
8it [00:07,  1.25it/s]
9it [00:07,  1.29it/s]
10it [00:08,  1.33it/s]
11it [00:09,  1.34it/s]
12it [00:10,  1.29it/s]
13it [00:10,  1.33it/s]
14it [00:11,  1.35it/s]
15it [00:12,  1.37it/s]
16it [00:13,  1.31it/s]
17it [00:13,  1.25it/s]
18it [00:14,  1.29it/s]
18it [00:14,  1.23it/s]
display(summary)
    InfeasibleCG  ...                                   TerminationCause
0           True  ...               Relative gradient = 8e-07 <= 6.1e-06
1           True  ...  Trust region is too small: 1.1920928955078126e-08
2           True  ...  Trust region is too small: 1.1920928955078126e-08
3           True  ...  Trust region is too small: 1.4901161193847656e-08
4           True  ...  Trust region is too small: 1.4901161193847656e-08
5           True  ...  Trust region is too small: 1.4901161193847656e-08
6           True  ...   Trust region is too small: 9.263433906788452e-09
7           True  ...   Trust region is too small: 9.263433906788452e-09
8           True  ...   Trust region is too small: 9.263433906788452e-09
9          False  ...  Trust region is too small: 1.1920928954904653e-08
10         False  ...  Trust region is too small: 1.1920928955077447e-08
11         False  ...  Trust region is too small: 1.1920928955077447e-08
12         False  ...  Trust region is too small: 1.4901161193847656e-08
13         False  ...  Trust region is too small: 1.4901161193847656e-08
14         False  ...  Trust region is too small: 1.4901161193847656e-08
15         False  ...   Trust region is too small: 9.263433906788452e-09
16         False  ...   Trust region is too small: 9.263433906788452e-09
17         False  ...   Trust region is too small: 9.263433906788452e-09

[18 rows x 9 columns]
SUMMARY_FILE = '01logit_all_algos.csv'
summary.to_csv(SUMMARY_FILE, index=False)
print(f'Summary reported in file {SUMMARY_FILE}')
Summary reported in file 01logit_all_algos.csv

Total running time of the script: (0 minutes 14.936 seconds)

Gallery generated by Sphinx-Gallery