Nested logit

Estimation of a nested logit model using sampling of alternatives.

Michel Bierlaire Sat Jul 26 2025, 13:01:22

import pandas as pd
from IPython.core.display_functions import display

import biogeme.biogeme_logging as blog
from alternatives import ID_COLUMN, all_alternatives, alternatives, asian, partitions
from biogeme.biogeme import BIOGEME
from biogeme.expressions import Beta
from biogeme.nests import NestsForNestedLogit, OneNestForNestedLogit
from biogeme.results_processing import get_pandas_estimated_parameters
from biogeme.sampling_of_alternatives import (
    ChoiceSetsGeneration,
    GenerateModel,
    SamplingContext,
    generate_segment_size,
)
from compare import compare
from specification_sampling import V, combined_variables
logger = blog.get_screen_logger(level=blog.INFO)
SAMPLE_SIZE = 20  # out of 100
SAMPLE_SIZE_MEV = 33  # out of 33
CHOICE_COLUMN = 'nested_0'
PARTITION = 'downtown'
MEV_PARTITION = 'uniform_asian'
MODEL_NAME = f'nested_{PARTITION}_{SAMPLE_SIZE}'
FILE_NAME = f'{MODEL_NAME}.dat'
the_partition = partitions.get(PARTITION)
if the_partition is None:
    raise ValueError(f'Unknown partition: {PARTITION}')
segment_sizes = generate_segment_size(SAMPLE_SIZE, the_partition.number_of_segments())

We use all alternatives in the nest.

mev_partition = partitions.get(MEV_PARTITION)
if mev_partition is None:
    raise ValueError(f'Unknown partition: {MEV_PARTITION}')
mev_segment_sizes = [SAMPLE_SIZE_MEV]
observations = pd.read_csv('obs_choice.dat')
context = SamplingContext(
    the_partition=the_partition,
    sample_sizes=segment_sizes,
    individuals=observations,
    choice_column=CHOICE_COLUMN,
    alternatives=alternatives,
    id_column=ID_COLUMN,
    biogeme_file_name=FILE_NAME,
    utility_function=V,
    combined_variables=combined_variables,
    mev_partition=mev_partition,
    mev_sample_sizes=mev_segment_sizes,
)
logger.info(context.reporting())
Size of the choice set: 100
Main partition: 2 segment(s) of size 46, 54
Main sample: 20: 10/46, 10/54
Nbr of MEV alternatives: 33
MEV partition: 1 segment(s) of size 33
MEV sample: 33: 33/33
the_data_generation = ChoiceSetsGeneration(context=context)
the_model_generation = GenerateModel(context=context)
biogeme_database = the_data_generation.sample_and_merge(recycle=False)
Generating 20 + 33 alternatives for 10000 observations

  0%|          | 0/10000 [00:00<?, ?it/s]
  1%|          | 64/10000 [00:00<00:15, 632.97it/s]
  1%|▏         | 129/10000 [00:00<00:15, 638.01it/s]
  2%|▏         | 194/10000 [00:00<00:15, 639.97it/s]
  3%|▎         | 259/10000 [00:00<00:15, 643.19it/s]
  3%|▎         | 328/10000 [00:00<00:14, 658.81it/s]
  4%|▍         | 397/10000 [00:00<00:14, 667.31it/s]
  5%|▍         | 464/10000 [00:00<00:14, 667.96it/s]
  5%|▌         | 532/10000 [00:00<00:14, 670.66it/s]
  6%|▌         | 600/10000 [00:00<00:13, 673.35it/s]
  7%|▋         | 669/10000 [00:01<00:13, 678.40it/s]
  7%|▋         | 738/10000 [00:01<00:13, 681.86it/s]
  8%|▊         | 808/10000 [00:01<00:13, 684.60it/s]
  9%|▉         | 878/10000 [00:01<00:13, 689.03it/s]
 10%|▉         | 950/10000 [00:01<00:12, 697.86it/s]
 10%|█         | 1022/10000 [00:01<00:12, 704.47it/s]
 11%|█         | 1094/10000 [00:01<00:12, 707.94it/s]
 12%|█▏        | 1169/10000 [00:01<00:12, 720.34it/s]
 12%|█▏        | 1245/10000 [00:01<00:12, 729.34it/s]
 13%|█▎        | 1320/10000 [00:01<00:11, 734.95it/s]
 14%|█▍        | 1396/10000 [00:02<00:11, 741.03it/s]
 15%|█▍        | 1474/10000 [00:02<00:11, 751.05it/s]
 16%|█▌        | 1552/10000 [00:02<00:11, 758.33it/s]
 16%|█▋        | 1630/10000 [00:02<00:10, 764.55it/s]
 17%|█▋        | 1709/10000 [00:02<00:10, 770.46it/s]
 18%|█▊        | 1789/10000 [00:02<00:10, 777.10it/s]
 19%|█▊        | 1869/10000 [00:02<00:10, 782.25it/s]
 19%|█▉        | 1949/10000 [00:02<00:10, 785.43it/s]
 20%|██        | 2029/10000 [00:02<00:10, 788.18it/s]
 21%|██        | 2109/10000 [00:02<00:09, 790.01it/s]
 22%|██▏       | 2189/10000 [00:03<00:09, 791.48it/s]
 23%|██▎       | 2272/10000 [00:03<00:09, 800.83it/s]
 24%|██▎       | 2353/10000 [00:03<00:09, 798.64it/s]
 24%|██▍       | 2433/10000 [00:03<00:09, 796.70it/s]
 25%|██▌       | 2513/10000 [00:03<00:09, 795.34it/s]
 26%|██▌       | 2593/10000 [00:03<00:09, 794.86it/s]
 27%|██▋       | 2673/10000 [00:03<00:09, 792.32it/s]
 28%|██▊       | 2753/10000 [00:03<00:09, 789.35it/s]
 28%|██▊       | 2832/10000 [00:03<00:09, 787.79it/s]
 29%|██▉       | 2911/10000 [00:03<00:09, 787.32it/s]
 30%|██▉       | 2990/10000 [00:04<00:08, 786.53it/s]
 31%|███       | 3069/10000 [00:04<00:08, 785.57it/s]
 31%|███▏      | 3148/10000 [00:04<00:08, 785.47it/s]
 32%|███▏      | 3227/10000 [00:04<00:08, 785.83it/s]
 33%|███▎      | 3307/10000 [00:04<00:08, 788.29it/s]
 34%|███▍      | 3387/10000 [00:04<00:08, 790.66it/s]
 35%|███▍      | 3467/10000 [00:04<00:08, 791.42it/s]
 35%|███▌      | 3547/10000 [00:04<00:08, 791.32it/s]
 36%|███▋      | 3627/10000 [00:04<00:08, 791.51it/s]
 37%|███▋      | 3707/10000 [00:04<00:07, 792.26it/s]
 38%|███▊      | 3787/10000 [00:05<00:07, 792.88it/s]
 39%|███▊      | 3867/10000 [00:05<00:07, 792.60it/s]
 39%|███▉      | 3947/10000 [00:05<00:07, 792.81it/s]
 40%|████      | 4027/10000 [00:05<00:07, 790.98it/s]
 41%|████      | 4107/10000 [00:05<00:07, 784.69it/s]
 42%|████▏     | 4186/10000 [00:05<00:07, 776.60it/s]
 43%|████▎     | 4264/10000 [00:05<00:07, 771.52it/s]
 43%|████▎     | 4342/10000 [00:05<00:07, 767.32it/s]
 44%|████▍     | 4419/10000 [00:05<00:07, 763.86it/s]
 45%|████▍     | 4496/10000 [00:05<00:07, 761.15it/s]
 46%|████▌     | 4573/10000 [00:06<00:07, 759.34it/s]
 46%|████▋     | 4649/10000 [00:06<00:07, 757.35it/s]
 47%|████▋     | 4725/10000 [00:06<00:06, 756.80it/s]
 48%|████▊     | 4801/10000 [00:06<00:06, 757.45it/s]
 49%|████▉     | 4878/10000 [00:06<00:06, 758.77it/s]
 50%|████▉     | 4954/10000 [00:06<00:06, 756.34it/s]
 50%|█████     | 5030/10000 [00:06<00:06, 749.56it/s]
 51%|█████     | 5105/10000 [00:06<00:06, 743.93it/s]
 52%|█████▏    | 5180/10000 [00:06<00:06, 737.49it/s]
 53%|█████▎    | 5254/10000 [00:06<00:06, 735.69it/s]
 53%|█████▎    | 5328/10000 [00:07<00:06, 733.69it/s]
 54%|█████▍    | 5402/10000 [00:07<00:06, 731.39it/s]
 55%|█████▍    | 5476/10000 [00:07<00:06, 730.43it/s]
 56%|█████▌    | 5550/10000 [00:07<00:06, 722.83it/s]
 56%|█████▌    | 5623/10000 [00:07<00:06, 722.00it/s]
 57%|█████▋    | 5696/10000 [00:07<00:05, 719.27it/s]
 58%|█████▊    | 5769/10000 [00:07<00:05, 720.28it/s]
 58%|█████▊    | 5842/10000 [00:07<00:05, 721.70it/s]
 59%|█████▉    | 5915/10000 [00:07<00:05, 719.05it/s]
 60%|█████▉    | 5987/10000 [00:07<00:05, 718.68it/s]
 61%|██████    | 6060/10000 [00:08<00:05, 719.25it/s]
 61%|██████▏   | 6133/10000 [00:08<00:05, 721.58it/s]
 62%|██████▏   | 6206/10000 [00:08<00:05, 719.98it/s]
 63%|██████▎   | 6279/10000 [00:08<00:05, 712.39it/s]
 64%|██████▎   | 6351/10000 [00:08<00:05, 706.61it/s]
 64%|██████▍   | 6422/10000 [00:08<00:05, 703.36it/s]
 65%|██████▍   | 6493/10000 [00:08<00:05, 700.54it/s]
 66%|██████▌   | 6564/10000 [00:08<00:04, 698.52it/s]
 66%|██████▋   | 6634/10000 [00:08<00:04, 697.65it/s]
 67%|██████▋   | 6704/10000 [00:09<00:04, 695.45it/s]
 68%|██████▊   | 6774/10000 [00:09<00:04, 694.99it/s]
 68%|██████▊   | 6844/10000 [00:09<00:04, 695.26it/s]
 69%|██████▉   | 6914/10000 [00:09<00:04, 696.15it/s]
 70%|██████▉   | 6984/10000 [00:09<00:04, 696.43it/s]
 71%|███████   | 7054/10000 [00:09<00:04, 696.12it/s]
 71%|███████   | 7124/10000 [00:09<00:04, 695.36it/s]
 72%|███████▏  | 7194/10000 [00:09<00:04, 696.17it/s]
 73%|███████▎  | 7264/10000 [00:09<00:03, 690.22it/s]
 73%|███████▎  | 7334/10000 [00:09<00:03, 691.18it/s]
 74%|███████▍  | 7404/10000 [00:10<00:03, 685.03it/s]
 75%|███████▍  | 7473/10000 [00:10<00:03, 656.02it/s]
 75%|███████▌  | 7541/10000 [00:10<00:03, 661.48it/s]
 76%|███████▌  | 7608/10000 [00:10<00:03, 638.24it/s]
 77%|███████▋  | 7673/10000 [00:10<00:03, 637.20it/s]
 77%|███████▋  | 7737/10000 [00:10<00:03, 623.96it/s]
 78%|███████▊  | 7800/10000 [00:10<00:03, 607.50it/s]
 79%|███████▊  | 7861/10000 [00:10<00:03, 593.43it/s]
 79%|███████▉  | 7921/10000 [00:10<00:03, 585.30it/s]
 80%|███████▉  | 7980/10000 [00:11<00:03, 560.01it/s]
 80%|████████  | 8037/10000 [00:11<00:03, 541.90it/s]
 81%|████████  | 8092/10000 [00:11<00:03, 527.03it/s]
 81%|████████▏ | 8145/10000 [00:11<00:03, 518.72it/s]
 82%|████████▏ | 8197/10000 [00:11<00:03, 513.26it/s]
 82%|████████▏ | 8249/10000 [00:11<00:03, 509.88it/s]
 83%|████████▎ | 8300/10000 [00:11<00:03, 506.34it/s]
 84%|████████▎ | 8351/10000 [00:11<00:03, 502.82it/s]
 84%|████████▍ | 8402/10000 [00:11<00:03, 502.45it/s]
 85%|████████▍ | 8453/10000 [00:11<00:03, 502.76it/s]
 85%|████████▌ | 8504/10000 [00:12<00:02, 503.30it/s]
 86%|████████▌ | 8555/10000 [00:12<00:02, 502.97it/s]
 86%|████████▌ | 8606/10000 [00:12<00:02, 503.41it/s]
 87%|████████▋ | 8657/10000 [00:12<00:02, 503.89it/s]
 87%|████████▋ | 8708/10000 [00:12<00:02, 504.47it/s]
 88%|████████▊ | 8759/10000 [00:12<00:02, 505.28it/s]
 88%|████████▊ | 8810/10000 [00:12<00:02, 505.78it/s]
 89%|████████▊ | 8861/10000 [00:12<00:02, 505.49it/s]
 89%|████████▉ | 8912/10000 [00:12<00:02, 505.12it/s]
 90%|████████▉ | 8963/10000 [00:12<00:02, 506.01it/s]
 90%|█████████ | 9018/10000 [00:13<00:01, 518.06it/s]
 91%|█████████ | 9076/10000 [00:13<00:01, 533.89it/s]
 91%|█████████▏| 9133/10000 [00:13<00:01, 544.07it/s]
 92%|█████████▏| 9190/10000 [00:13<00:01, 551.75it/s]
 92%|█████████▏| 9247/10000 [00:13<00:01, 556.29it/s]
 93%|█████████▎| 9304/10000 [00:13<00:01, 560.27it/s]
 94%|█████████▎| 9361/10000 [00:13<00:01, 563.04it/s]
 94%|█████████▍| 9419/10000 [00:13<00:01, 565.27it/s]
 95%|█████████▍| 9476/10000 [00:13<00:00, 563.36it/s]
 95%|█████████▌| 9534/10000 [00:13<00:00, 565.54it/s]
 96%|█████████▌| 9594/10000 [00:14<00:00, 574.82it/s]
 97%|█████████▋| 9656/10000 [00:14<00:00, 587.83it/s]
 97%|█████████▋| 9717/10000 [00:14<00:00, 591.52it/s]
 98%|█████████▊| 9777/10000 [00:14<00:00, 591.75it/s]
 98%|█████████▊| 9838/10000 [00:14<00:00, 595.42it/s]
 99%|█████████▉| 9899/10000 [00:14<00:00, 598.79it/s]
100%|█████████▉| 9960/10000 [00:14<00:00, 599.99it/s]
100%|██████████| 10000/10000 [00:15<00:00, 633.04it/s]
Define new variables

Defining new variables...:   0%|          | 0/20 [00:00<?, ?it/s]
Defining new variables...:  10%|█         | 2/20 [00:00<00:01, 10.94it/s]
Defining new variables...:  20%|██        | 4/20 [00:00<00:01, 11.63it/s]
Defining new variables...:  30%|███       | 6/20 [00:00<00:01, 11.85it/s]
Defining new variables...:  40%|████      | 8/20 [00:00<00:00, 12.04it/s]
Defining new variables...:  50%|█████     | 10/20 [00:00<00:00, 12.05it/s]
Defining new variables...:  60%|██████    | 12/20 [00:01<00:00, 12.13it/s]
Defining new variables...:  70%|███████   | 14/20 [00:01<00:00, 12.23it/s]
Defining new variables...:  80%|████████  | 16/20 [00:01<00:00, 12.19it/s]
Defining new variables...:  90%|█████████ | 18/20 [00:01<00:00, 12.16it/s]
Defining new variables...: 100%|██████████| 20/20 [00:01<00:00, 12.21it/s]
Defining new variables...: 100%|██████████| 20/20 [00:04<00:00,  4.54it/s]
File nested_downtown_20.dat has been created.

Definition of the nest.

mu_asian = Beta('mu_asian', 1.0, 1.0, None, 0)
nest_asian = OneNestForNestedLogit(
    nest_param=mu_asian, list_of_alternatives=asian, name='asian'
)
nests = NestsForNestedLogit(
    choice_set=all_alternatives,
    tuple_of_nests=(nest_asian,),
)
The following elements do not appear in any nest and are assumed each to be alone in a separate nest: {2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 32, 35, 36, 38, 39, 41, 42, 43, 44, 46, 48, 49, 52, 53, 54, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 71, 73, 74, 75, 77, 82, 83, 84, 85, 86, 88, 90, 93, 95, 96, 97, 99}. If it is not the intention, check the assignment of alternatives to nests.
log_probability = the_model_generation.get_nested_logit(nests)
the_biogeme = BIOGEME(biogeme_database, log_probability)
the_biogeme.model_name = MODEL_NAME
Biogeme parameters read from biogeme.toml.

Calculate the null log likelihood for reporting.

the_biogeme.calculate_null_loglikelihood(
    {i: 1 for i in range(context.total_sample_size)}
)
-29957.32273553991

Estimate the parameters

results = the_biogeme.estimate(recycle=False)
*** Initial values of the parameters are obtained from the file __nested_downtown_20.iter
Parameter values restored from __nested_downtown_20.iter
Starting values for the algorithm: {'beta_rating': 0.7635988967835228, 'beta_price': -0.40544498815931296, 'beta_chinese': 0.6989222156872917, 'beta_japanese': 1.2593148143727921, 'beta_korean': 0.7066291311488135, 'beta_indian': 0.9728385421122032, 'beta_french': 0.7361486098375662, 'beta_mexican': 1.2261498478445416, 'beta_lebanese': 0.7246008469144617, 'beta_ethiopian': 0.5074448770679202, 'beta_log_dist': -0.5888436340047349, 'mu_asian': 2.0231709645090556}
As the model is not too complex, we activate the calculation of second derivatives. To change this behavior, modify the algorithm to "simple_bounds" in the TOML file.
Optimization algorithm: hybrid Newton/BFGS with simple bounds [simple_bounds]
** Optimization: Newton with trust region for simple bounds
Optimization algorithm has converged.
Relative gradient: 2.2935645888325624e-09
Cause of termination: Relative gradient = 2.3e-09 <= 6.1e-06
Number of function evaluations: 1
Number of gradient evaluations: 1
Number of hessian evaluations: 0
Algorithm: Newton with trust region for simple bound constraints
Number of iterations: 0
Optimization time: 0:02:04.796532
Calculate second derivatives and BHHH
File nested_downtown_20~00.html has been generated.
File nested_downtown_20~00.yaml has been generated.
print(results.short_summary())
Results for model nested_downtown_20
Nbr of parameters:              12
Sample size:                    10000
Excluded data:                  0
Null log likelihood:            -29957.32
Final log likelihood:           -22943.69
Likelihood ratio test (null):           14027.27
Rho square (null):                      0.234
Rho bar square (null):                  0.234
Akaike Information Criterion:   45911.37
Bayesian Information Criterion: 45997.9
estimated_parameters = get_pandas_estimated_parameters(estimation_results=results)
display(estimated_parameters)
              Name     Value  Robust std err.  Robust t-stat.  Robust p-value
0      beta_rating  0.763599         0.015212       50.197105             0.0
1       beta_price -0.405445         0.012309      -32.938544             0.0
2     beta_chinese  0.698922         0.070766        9.876539             0.0
3    beta_japanese  1.259315         0.054086       23.283473             0.0
4      beta_korean  0.706629         0.061496       11.490687             0.0
5      beta_indian  0.972839         0.063578       15.301574             0.0
6      beta_french  0.736149         0.049137       14.981582             0.0
7     beta_mexican  1.226150         0.029064       42.187328             0.0
8    beta_lebanese  0.724601         0.049785       14.554668             0.0
9   beta_ethiopian  0.507445         0.040162       12.634854             0.0
10   beta_log_dist -0.588844         0.012760      -46.149220             0.0
11        mu_asian  2.023171         0.058728       34.449863             0.0
df, msg = compare(estimated_parameters)
print(df)
              Name  True Value  Estimated Value    T-Test
0      beta_rating        0.75         0.763599 -0.893958
1       beta_price       -0.40        -0.405445  0.442353
2     beta_chinese        0.75         0.698922  0.721785
3    beta_japanese        1.25         1.259315 -0.172222
4      beta_korean        0.75         0.706629  0.705265
5      beta_indian        1.00         0.972839  0.427217
6      beta_french        0.75         0.736149  0.281894
7     beta_mexican        1.25         1.226150  0.820596
8    beta_lebanese        0.75         0.724601  0.510179
9   beta_ethiopian        0.50         0.507445 -0.185370
10   beta_log_dist       -0.60        -0.588844 -0.874354
11        mu_asian        2.00         2.023171 -0.394547
print(msg)
Parameters not estimated: ['mu_downtown']

Total running time of the script: (14 minutes 12.065 seconds)

Gallery generated by Sphinx-Gallery