Note
Go to the end to download the full example code.
Nested logit¶
Estimation of a nested logit model using sampling of alternatives.
Michel Bierlaire Sat Jul 26 2025, 13:01:22
import pandas as pd
from IPython.core.display_functions import display
import biogeme.biogeme_logging as blog
from alternatives import ID_COLUMN, all_alternatives, alternatives, asian, partitions
from biogeme.biogeme import BIOGEME
from biogeme.expressions import Beta
from biogeme.nests import NestsForNestedLogit, OneNestForNestedLogit
from biogeme.results_processing import get_pandas_estimated_parameters
from biogeme.sampling_of_alternatives import (
ChoiceSetsGeneration,
GenerateModel,
SamplingContext,
generate_segment_size,
)
from compare import compare
from specification_sampling import V, combined_variables
logger = blog.get_screen_logger(level=blog.INFO)
SAMPLE_SIZE = 20 # out of 100
SAMPLE_SIZE_MEV = 33 # out of 33
CHOICE_COLUMN = 'nested_0'
PARTITION = 'downtown'
MEV_PARTITION = 'uniform_asian'
MODEL_NAME = f'nested_{PARTITION}_{SAMPLE_SIZE}'
FILE_NAME = f'{MODEL_NAME}.dat'
the_partition = partitions.get(PARTITION)
if the_partition is None:
raise ValueError(f'Unknown partition: {PARTITION}')
segment_sizes = generate_segment_size(SAMPLE_SIZE, the_partition.number_of_segments())
We use all alternatives in the nest.
mev_partition = partitions.get(MEV_PARTITION)
if mev_partition is None:
raise ValueError(f'Unknown partition: {MEV_PARTITION}')
mev_segment_sizes = [SAMPLE_SIZE_MEV]
observations = pd.read_csv('obs_choice.dat')
context = SamplingContext(
the_partition=the_partition,
sample_sizes=segment_sizes,
individuals=observations,
choice_column=CHOICE_COLUMN,
alternatives=alternatives,
id_column=ID_COLUMN,
biogeme_file_name=FILE_NAME,
utility_function=V,
combined_variables=combined_variables,
mev_partition=mev_partition,
mev_sample_sizes=mev_segment_sizes,
)
logger.info(context.reporting())
Size of the choice set: 100
Main partition: 2 segment(s) of size 46, 54
Main sample: 20: 10/46, 10/54
Nbr of MEV alternatives: 33
MEV partition: 1 segment(s) of size 33
MEV sample: 33: 33/33
the_data_generation = ChoiceSetsGeneration(context=context)
the_model_generation = GenerateModel(context=context)
biogeme_database = the_data_generation.sample_and_merge(recycle=False)
Generating 20 + 33 alternatives for 10000 observations
0%| | 0/10000 [00:00<?, ?it/s]
1%| | 81/10000 [00:00<00:12, 805.47it/s]
2%|▏ | 163/10000 [00:00<00:12, 807.42it/s]
2%|▏ | 247/10000 [00:00<00:11, 818.07it/s]
3%|▎ | 330/10000 [00:00<00:11, 822.27it/s]
4%|▍ | 413/10000 [00:00<00:11, 819.55it/s]
5%|▍ | 497/10000 [00:00<00:11, 824.42it/s]
6%|▌ | 581/10000 [00:00<00:11, 829.42it/s]
7%|▋ | 664/10000 [00:00<00:11, 823.70it/s]
7%|▋ | 748/10000 [00:00<00:11, 827.71it/s]
8%|▊ | 831/10000 [00:01<00:11, 825.62it/s]
9%|▉ | 914/10000 [00:01<00:11, 822.92it/s]
10%|▉ | 997/10000 [00:01<00:10, 822.89it/s]
11%|█ | 1080/10000 [00:01<00:10, 821.81it/s]
12%|█▏ | 1168/10000 [00:01<00:10, 837.25it/s]
13%|█▎ | 1252/10000 [00:01<00:10, 833.66it/s]
13%|█▎ | 1336/10000 [00:01<00:10, 830.67it/s]
14%|█▍ | 1420/10000 [00:01<00:10, 828.48it/s]
15%|█▌ | 1503/10000 [00:01<00:10, 824.40it/s]
16%|█▌ | 1589/10000 [00:01<00:10, 833.16it/s]
17%|█▋ | 1673/10000 [00:02<00:10, 825.09it/s]
18%|█▊ | 1756/10000 [00:02<00:10, 822.74it/s]
18%|█▊ | 1839/10000 [00:02<00:09, 819.38it/s]
19%|█▉ | 1922/10000 [00:02<00:09, 821.75it/s]
20%|██ | 2008/10000 [00:02<00:09, 832.20it/s]
21%|██ | 2092/10000 [00:02<00:09, 826.94it/s]
22%|██▏ | 2176/10000 [00:02<00:09, 829.99it/s]
23%|██▎ | 2260/10000 [00:02<00:09, 827.86it/s]
23%|██▎ | 2343/10000 [00:02<00:09, 827.32it/s]
24%|██▍ | 2429/10000 [00:02<00:09, 834.59it/s]
25%|██▌ | 2513/10000 [00:03<00:08, 833.53it/s]
26%|██▌ | 2597/10000 [00:03<00:08, 834.25it/s]
27%|██▋ | 2681/10000 [00:03<00:08, 827.96it/s]
28%|██▊ | 2764/10000 [00:03<00:08, 824.97it/s]
28%|██▊ | 2848/10000 [00:03<00:08, 827.32it/s]
29%|██▉ | 2931/10000 [00:03<00:08, 826.58it/s]
30%|███ | 3014/10000 [00:03<00:08, 819.10it/s]
31%|███ | 3096/10000 [00:03<00:08, 818.76it/s]
32%|███▏ | 3178/10000 [00:03<00:08, 817.95it/s]
33%|███▎ | 3262/10000 [00:03<00:08, 823.12it/s]
33%|███▎ | 3346/10000 [00:04<00:08, 827.76it/s]
34%|███▍ | 3429/10000 [00:04<00:07, 827.88it/s]
35%|███▌ | 3512/10000 [00:04<00:07, 822.43it/s]
36%|███▌ | 3596/10000 [00:04<00:07, 826.15it/s]
37%|███▋ | 3680/10000 [00:04<00:07, 828.84it/s]
38%|███▊ | 3765/10000 [00:04<00:07, 832.63it/s]
38%|███▊ | 3849/10000 [00:04<00:07, 831.00it/s]
39%|███▉ | 3933/10000 [00:04<00:07, 832.79it/s]
40%|████ | 4017/10000 [00:04<00:07, 831.01it/s]
41%|████ | 4101/10000 [00:04<00:07, 831.78it/s]
42%|████▏ | 4186/10000 [00:05<00:06, 835.95it/s]
43%|████▎ | 4270/10000 [00:05<00:06, 832.13it/s]
44%|████▎ | 4354/10000 [00:05<00:06, 827.04it/s]
44%|████▍ | 4437/10000 [00:05<00:06, 825.98it/s]
45%|████▌ | 4520/10000 [00:05<00:06, 826.76it/s]
46%|████▌ | 4606/10000 [00:05<00:06, 835.66it/s]
47%|████▋ | 4692/10000 [00:05<00:06, 840.42it/s]
48%|████▊ | 4777/10000 [00:05<00:06, 838.36it/s]
49%|████▊ | 4861/10000 [00:05<00:06, 836.04it/s]
49%|████▉ | 4945/10000 [00:05<00:06, 831.22it/s]
50%|█████ | 5033/10000 [00:06<00:05, 845.33it/s]
51%|█████ | 5122/10000 [00:06<00:05, 856.13it/s]
52%|█████▏ | 5211/10000 [00:06<00:05, 864.10it/s]
53%|█████▎ | 5300/10000 [00:06<00:05, 869.84it/s]
54%|█████▍ | 5389/10000 [00:06<00:05, 873.33it/s]
55%|█████▍ | 5477/10000 [00:06<00:05, 869.23it/s]
56%|█████▌ | 5564/10000 [00:06<00:05, 839.79it/s]
56%|█████▋ | 5649/10000 [00:06<00:05, 829.17it/s]
57%|█████▋ | 5733/10000 [00:06<00:05, 815.31it/s]
58%|█████▊ | 5815/10000 [00:07<00:05, 810.23it/s]
59%|█████▉ | 5898/10000 [00:07<00:05, 815.00it/s]
60%|█████▉ | 5980/10000 [00:07<00:04, 815.34it/s]
61%|██████ | 6062/10000 [00:07<00:04, 809.97it/s]
61%|██████▏ | 6145/10000 [00:07<00:04, 814.88it/s]
62%|██████▏ | 6227/10000 [00:07<00:04, 811.72it/s]
63%|██████▎ | 6310/10000 [00:07<00:04, 814.65it/s]
64%|██████▍ | 6395/10000 [00:07<00:04, 824.37it/s]
65%|██████▍ | 6478/10000 [00:07<00:04, 820.22it/s]
66%|██████▌ | 6561/10000 [00:07<00:04, 820.45it/s]
66%|██████▋ | 6645/10000 [00:08<00:04, 823.88it/s]
67%|██████▋ | 6728/10000 [00:08<00:03, 822.21it/s]
68%|██████▊ | 6813/10000 [00:08<00:03, 828.92it/s]
69%|██████▉ | 6896/10000 [00:08<00:03, 823.93it/s]
70%|██████▉ | 6979/10000 [00:08<00:03, 821.30it/s]
71%|███████ | 7062/10000 [00:08<00:03, 816.39it/s]
71%|███████▏ | 7145/10000 [00:08<00:03, 818.72it/s]
72%|███████▏ | 7231/10000 [00:08<00:03, 830.89it/s]
73%|███████▎ | 7315/10000 [00:08<00:03, 828.61it/s]
74%|███████▍ | 7398/10000 [00:08<00:03, 827.78it/s]
75%|███████▍ | 7481/10000 [00:09<00:03, 828.40it/s]
76%|███████▌ | 7564/10000 [00:09<00:02, 825.99it/s]
76%|███████▋ | 7650/10000 [00:09<00:02, 835.37it/s]
77%|███████▋ | 7734/10000 [00:09<00:02, 835.32it/s]
78%|███████▊ | 7818/10000 [00:09<00:02, 833.44it/s]
79%|███████▉ | 7902/10000 [00:09<00:02, 828.86it/s]
80%|███████▉ | 7985/10000 [00:09<00:02, 825.35it/s]
81%|████████ | 8068/10000 [00:09<00:02, 826.45it/s]
82%|████████▏ | 8153/10000 [00:09<00:02, 831.30it/s]
82%|████████▏ | 8237/10000 [00:09<00:02, 828.46it/s]
83%|████████▎ | 8320/10000 [00:10<00:02, 826.20it/s]
84%|████████▍ | 8403/10000 [00:10<00:01, 824.08it/s]
85%|████████▍ | 8486/10000 [00:10<00:01, 823.84it/s]
86%|████████▌ | 8571/10000 [00:10<00:01, 831.26it/s]
87%|████████▋ | 8655/10000 [00:10<00:01, 830.20it/s]
87%|████████▋ | 8739/10000 [00:10<00:01, 831.95it/s]
88%|████████▊ | 8824/10000 [00:10<00:01, 835.07it/s]
89%|████████▉ | 8910/10000 [00:10<00:01, 840.10it/s]
90%|████████▉ | 8996/10000 [00:10<00:01, 845.79it/s]
91%|█████████ | 9081/10000 [00:10<00:01, 839.86it/s]
92%|█████████▏| 9165/10000 [00:11<00:01, 833.52it/s]
92%|█████████▏| 9249/10000 [00:11<00:00, 830.34it/s]
93%|█████████▎| 9333/10000 [00:11<00:00, 823.93it/s]
94%|█████████▍| 9419/10000 [00:11<00:00, 833.50it/s]
95%|█████████▌| 9503/10000 [00:11<00:00, 827.78it/s]
96%|█████████▌| 9587/10000 [00:11<00:00, 829.78it/s]
97%|█████████▋| 9670/10000 [00:11<00:00, 827.85it/s]
98%|█████████▊| 9755/10000 [00:11<00:00, 832.94it/s]
98%|█████████▊| 9844/10000 [00:11<00:00, 847.40it/s]
99%|█████████▉| 9933/10000 [00:11<00:00, 858.06it/s]
100%|██████████| 10000/10000 [00:12<00:00, 777.57it/s]
Define new variables
Defining new variables...: 0%| | 0/20 [00:00<?, ?it/s]
Defining new variables...: 10%|█ | 2/20 [00:00<00:01, 14.78it/s]
Defining new variables...: 20%|██ | 4/20 [00:00<00:01, 14.74it/s]
Defining new variables...: 30%|███ | 6/20 [00:00<00:00, 15.12it/s]
Defining new variables...: 40%|████ | 8/20 [00:00<00:00, 15.26it/s]
Defining new variables...: 50%|█████ | 10/20 [00:00<00:00, 15.26it/s]
Defining new variables...: 60%|██████ | 12/20 [00:00<00:00, 15.40it/s]
Defining new variables...: 70%|███████ | 14/20 [00:00<00:00, 15.60it/s]
Defining new variables...: 80%|████████ | 16/20 [00:01<00:00, 15.68it/s]
Defining new variables...: 90%|█████████ | 18/20 [00:01<00:00, 15.83it/s]
Defining new variables...: 100%|██████████| 20/20 [00:01<00:00, 15.86it/s]
Defining new variables...: 100%|██████████| 20/20 [00:03<00:00, 5.87it/s]
File nested_downtown_20.dat has been created.
Definition of the nest.
mu_asian = Beta('mu_asian', 1.0, 1.0, None, 0)
nest_asian = OneNestForNestedLogit(
nest_param=mu_asian, list_of_alternatives=asian, name='asian'
)
nests = NestsForNestedLogit(
choice_set=all_alternatives,
tuple_of_nests=(nest_asian,),
)
The following elements do not appear in any nest and are assumed each to be alone in a separate nest: {2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 32, 35, 36, 38, 39, 41, 42, 43, 44, 46, 48, 49, 52, 53, 54, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 71, 73, 74, 75, 77, 82, 83, 84, 85, 86, 88, 90, 93, 95, 96, 97, 99}. If it is not the intention, check the assignment of alternatives to nests.
log_probability = the_model_generation.get_nested_logit(nests)
the_biogeme = BIOGEME(biogeme_database, log_probability)
the_biogeme.model_name = MODEL_NAME
Biogeme parameters read from biogeme.toml.
Calculate the null log likelihood for reporting.
the_biogeme.calculate_null_loglikelihood(
{i: 1 for i in range(context.total_sample_size)}
)
-29957.32273553991
Estimate the parameters
results = the_biogeme.estimate(recycle=False)
*** Initial values of the parameters are obtained from the file __nested_downtown_20.iter
Cannot read file __nested_downtown_20.iter. Statement is ignored.
Starting values for the algorithm: {}
As the model is not too complex, we activate the calculation of second derivatives. To change this behavior, modify the algorithm to "simple_bounds" in the TOML file.
Optimization algorithm: hybrid Newton/BFGS with simple bounds [simple_bounds]
** Optimization: Newton with trust region for simple bounds
Iter. beta_rating beta_price beta_chinese beta_japanese beta_korean beta_indian beta_french beta_mexican beta_lebanese beta_ethiopian beta_log_dist mu_asian Function Relgrad Radius Rho
0 0.46 -0.51 -0.097 0.12 -0.067 -0.12 0.021 0.69 0.0051 -0.042 -1 1.2 2.4e+04 0.08 10 0.92 ++
1 0.76 -0.4 0.17 1 0.25 0.37 0.81 1.2 0.78 0.52 -0.61 1.4 2.3e+04 0.024 1e+02 1 ++
2 0.79 -0.42 0.52 1.1 0.57 0.83 0.75 1.2 0.73 0.51 -0.62 1.7 2.3e+04 0.013 1e+03 1.1 ++
3 0.77 -0.41 0.63 1.2 0.65 0.9 0.74 1.2 0.73 0.51 -0.6 1.9 2.3e+04 0.0033 1e+04 1.2 ++
4 0.76 -0.41 0.69 1.3 0.7 0.97 0.74 1.2 0.72 0.51 -0.59 2 2.3e+04 0.00044 1e+05 1.1 ++
5 0.76 -0.41 0.7 1.3 0.71 0.97 0.74 1.2 0.72 0.51 -0.59 2 2.3e+04 6.5e-06 1e+06 1 ++
6 0.76 -0.41 0.7 1.3 0.71 0.97 0.74 1.2 0.72 0.51 -0.59 2 2.3e+04 1.8e-09 1e+06 1 ++
Optimization algorithm has converged.
Relative gradient: 1.759218152502578e-09
Cause of termination: Relative gradient = 1.8e-09 <= 6.1e-06
Number of function evaluations: 22
Number of gradient evaluations: 15
Number of hessian evaluations: 7
Algorithm: Newton with trust region for simple bound constraints
Number of iterations: 7
Proportion of Hessian calculation: 7/7 = 100.0%
Optimization time: 0:04:20.096855
Calculate second derivatives and BHHH
File nested_downtown_20.html has been generated.
File nested_downtown_20.yaml has been generated.
print(results.short_summary())
Results for model nested_downtown_20
Nbr of parameters: 12
Sample size: 10000
Excluded data: 0
Null log likelihood: -29957.32
Final log likelihood: -22943.69
Likelihood ratio test (null): 14027.27
Rho square (null): 0.234
Rho bar square (null): 0.234
Akaike Information Criterion: 45911.37
Bayesian Information Criterion: 45997.9
estimated_parameters = get_pandas_estimated_parameters(estimation_results=results)
display(estimated_parameters)
Name Value Robust std err. Robust t-stat. Robust p-value
0 beta_rating 0.763599 0.015212 50.197105 0.0
1 beta_price -0.405445 0.012309 -32.938544 0.0
2 beta_chinese 0.698922 0.070766 9.876539 0.0
3 beta_japanese 1.259315 0.054086 23.283473 0.0
4 beta_korean 0.706629 0.061496 11.490687 0.0
5 beta_indian 0.972839 0.063578 15.301574 0.0
6 beta_french 0.736149 0.049137 14.981582 0.0
7 beta_mexican 1.226150 0.029064 42.187328 0.0
8 beta_lebanese 0.724601 0.049785 14.554668 0.0
9 beta_ethiopian 0.507445 0.040162 12.634854 0.0
10 beta_log_dist -0.588844 0.012760 -46.149220 0.0
11 mu_asian 2.023171 0.058728 34.449863 0.0
df, msg = compare(estimated_parameters)
print(df)
Name True Value Estimated Value T-Test
0 beta_rating 0.75 0.763599 -0.893958
1 beta_price -0.40 -0.405445 0.442353
2 beta_chinese 0.75 0.698922 0.721785
3 beta_japanese 1.25 1.259315 -0.172222
4 beta_korean 0.75 0.706629 0.705265
5 beta_indian 1.00 0.972839 0.427217
6 beta_french 0.75 0.736149 0.281894
7 beta_mexican 1.25 1.226150 0.820596
8 beta_lebanese 0.75 0.724601 0.510179
9 beta_ethiopian 0.50 0.507445 -0.185370
10 beta_log_dist -0.60 -0.588844 -0.874354
11 mu_asian 2.00 2.023171 -0.394547
print(msg)
Parameters not estimated: ['mu_downtown']
Total running time of the script: (9 minutes 4.900 seconds)