Note
Go to the end to download the full example code.
Nested logit¶
Estimation of a nested logit model using sampling of alternatives.
Michel Bierlaire Sat Jul 26 2025, 13:01:22
import pandas as pd
from alternatives import ID_COLUMN, all_alternatives, alternatives, asian, partitions
from compare import compare
from IPython.core.display_functions import display
from specification_sampling import V, combined_variables
import biogeme.biogeme_logging as blog
from biogeme.biogeme import BIOGEME
from biogeme.expressions import Beta
from biogeme.nests import NestsForNestedLogit, OneNestForNestedLogit
from biogeme.results_processing import (
EstimationResults,
get_pandas_estimated_parameters,
)
from biogeme.sampling_of_alternatives import (
ChoiceSetsGeneration,
GenerateModel,
SamplingContext,
generate_segment_size,
)
from biogeme.tools import timeit
ID rating price ... rest_lon distance downtown
0 0 1 4 ... 42.220972 71.735518 1.0
1 1 2 2 ... 50.549434 106.267205 0.0
2 2 3 3 ... 97.830520 136.298409 0.0
3 3 4 1 ... 69.152206 85.941147 0.0
4 4 4 3 ... 89.145620 96.773021 0.0
.. .. ... ... ... ... ... ...
95 95 4 3 ... 9.511387 84.166441 0.0
96 96 1 1 ... 92.144641 95.601366 0.0
97 97 4 2 ... 27.657518 30.440555 1.0
98 98 4 4 ... 32.303213 45.027143 1.0
99 99 4 1 ... 13.672495 25.703295 1.0
[100 rows x 16 columns]
Number of asian restaurants: 33
logger = blog.get_screen_logger(level=blog.INFO)
SAMPLE_SIZE = 20 # out of 100
SAMPLE_SIZE_MEV = 33 # out of 33
CHOICE_COLUMN = 'nested_0'
PARTITION = 'downtown'
MEV_PARTITION = 'uniform_asian'
MODEL_NAME = f'nested_{PARTITION}_{SAMPLE_SIZE}'
FILE_NAME = f'{MODEL_NAME}.dat'
the_partition = partitions.get(PARTITION)
if the_partition is None:
raise ValueError(f'Unknown partition: {PARTITION}')
segment_sizes = generate_segment_size(SAMPLE_SIZE, the_partition.number_of_segments())
We use all alternatives in the nest.
mev_partition = partitions.get(MEV_PARTITION)
if mev_partition is None:
raise ValueError(f'Unknown partition: {MEV_PARTITION}')
mev_segment_sizes = [SAMPLE_SIZE_MEV]
observations = pd.read_csv('obs_choice.dat')
context = SamplingContext(
the_partition=the_partition,
sample_sizes=segment_sizes,
individuals=observations,
choice_column=CHOICE_COLUMN,
alternatives=alternatives,
id_column=ID_COLUMN,
biogeme_file_name=FILE_NAME,
utility_function=V,
combined_variables=combined_variables,
mev_partition=mev_partition,
mev_sample_sizes=mev_segment_sizes,
)
logger.info(context.reporting())
Size of the choice set: 100
Main partition: 2 segment(s) of size 46, 54
Main sample: 20: 10/46, 10/54
Nbr of MEV alternatives: 33
MEV partition: 1 segment(s) of size 33
MEV sample: 33: 33/33
the_data_generation = ChoiceSetsGeneration(context=context)
the_model_generation = GenerateModel(context=context)
biogeme_database = the_data_generation.sample_and_merge(recycle=False)
Generating 20 + 33 alternatives for 10000 observations
0%| | 0/10000 [00:00<?, ?it/s]
1%| | 81/10000 [00:00<00:12, 801.20it/s]
2%|▏ | 166/10000 [00:00<00:11, 825.88it/s]
2%|▏ | 249/10000 [00:00<00:11, 818.44it/s]
3%|▎ | 332/10000 [00:00<00:11, 821.13it/s]
4%|▍ | 418/10000 [00:00<00:11, 831.48it/s]
5%|▌ | 503/10000 [00:00<00:11, 835.37it/s]
6%|▌ | 589/10000 [00:00<00:11, 841.08it/s]
7%|▋ | 674/10000 [00:00<00:11, 841.65it/s]
8%|▊ | 759/10000 [00:00<00:10, 843.14it/s]
8%|▊ | 844/10000 [00:01<00:10, 844.35it/s]
9%|▉ | 929/10000 [00:01<00:10, 843.22it/s]
10%|█ | 1014/10000 [00:01<00:10, 841.61it/s]
11%|█ | 1099/10000 [00:01<00:10, 842.72it/s]
12%|█▏ | 1185/10000 [00:01<00:10, 845.13it/s]
13%|█▎ | 1270/10000 [00:01<00:10, 843.99it/s]
14%|█▎ | 1355/10000 [00:01<00:10, 844.19it/s]
14%|█▍ | 1440/10000 [00:01<00:10, 841.98it/s]
15%|█▌ | 1525/10000 [00:01<00:10, 840.72it/s]
16%|█▌ | 1610/10000 [00:01<00:09, 843.24it/s]
17%|█▋ | 1695/10000 [00:02<00:09, 841.99it/s]
18%|█▊ | 1780/10000 [00:02<00:09, 841.59it/s]
19%|█▊ | 1865/10000 [00:02<00:09, 840.66it/s]
20%|█▉ | 1950/10000 [00:02<00:09, 836.61it/s]
20%|██ | 2034/10000 [00:02<00:09, 834.71it/s]
21%|██ | 2119/10000 [00:02<00:09, 836.51it/s]
22%|██▏ | 2205/10000 [00:02<00:09, 841.45it/s]
23%|██▎ | 2290/10000 [00:02<00:09, 842.37it/s]
24%|██▍ | 2375/10000 [00:02<00:09, 841.77it/s]
25%|██▍ | 2460/10000 [00:02<00:08, 838.42it/s]
25%|██▌ | 2544/10000 [00:03<00:08, 838.26it/s]
26%|██▋ | 2629/10000 [00:03<00:08, 840.55it/s]
27%|██▋ | 2714/10000 [00:03<00:08, 839.51it/s]
28%|██▊ | 2798/10000 [00:03<00:08, 836.32it/s]
29%|██▉ | 2883/10000 [00:03<00:08, 838.01it/s]
30%|██▉ | 2967/10000 [00:03<00:08, 836.54it/s]
31%|███ | 3053/10000 [00:03<00:08, 841.45it/s]
31%|███▏ | 3138/10000 [00:03<00:08, 841.27it/s]
32%|███▏ | 3224/10000 [00:03<00:08, 844.07it/s]
33%|███▎ | 3310/10000 [00:03<00:07, 848.16it/s]
34%|███▍ | 3396/10000 [00:04<00:07, 849.78it/s]
35%|███▍ | 3482/10000 [00:04<00:07, 851.84it/s]
36%|███▌ | 3568/10000 [00:04<00:07, 846.71it/s]
37%|███▋ | 3653/10000 [00:04<00:07, 845.13it/s]
37%|███▋ | 3738/10000 [00:04<00:07, 843.62it/s]
38%|███▊ | 3825/10000 [00:04<00:07, 848.58it/s]
39%|███▉ | 3912/10000 [00:04<00:07, 852.13it/s]
40%|███▉ | 3998/10000 [00:04<00:07, 846.16it/s]
41%|████ | 4083/10000 [00:04<00:06, 847.25it/s]
42%|████▏ | 4169/10000 [00:04<00:06, 849.09it/s]
43%|████▎ | 4255/10000 [00:05<00:06, 851.19it/s]
43%|████▎ | 4342/10000 [00:05<00:06, 854.73it/s]
44%|████▍ | 4428/10000 [00:05<00:06, 852.70it/s]
45%|████▌ | 4514/10000 [00:05<00:06, 847.85it/s]
46%|████▌ | 4601/10000 [00:05<00:06, 851.76it/s]
47%|████▋ | 4687/10000 [00:05<00:06, 853.23it/s]
48%|████▊ | 4774/10000 [00:05<00:06, 856.12it/s]
49%|████▊ | 4860/10000 [00:05<00:06, 854.61it/s]
49%|████▉ | 4946/10000 [00:05<00:05, 853.26it/s]
50%|█████ | 5032/10000 [00:05<00:05, 852.38it/s]
51%|█████ | 5118/10000 [00:06<00:05, 850.65it/s]
52%|█████▏ | 5205/10000 [00:06<00:05, 854.87it/s]
53%|█████▎ | 5291/10000 [00:06<00:05, 849.02it/s]
54%|█████▍ | 5377/10000 [00:06<00:05, 852.04it/s]
55%|█████▍ | 5463/10000 [00:06<00:05, 847.77it/s]
55%|█████▌ | 5549/10000 [00:06<00:05, 849.50it/s]
56%|█████▋ | 5635/10000 [00:06<00:05, 850.65it/s]
57%|█████▋ | 5721/10000 [00:06<00:05, 846.81it/s]
58%|█████▊ | 5806/10000 [00:06<00:04, 843.82it/s]
59%|█████▉ | 5891/10000 [00:06<00:04, 843.01it/s]
60%|█████▉ | 5976/10000 [00:07<00:05, 795.38it/s]
61%|██████ | 6061/10000 [00:07<00:04, 810.36it/s]
61%|██████▏ | 6146/10000 [00:07<00:04, 821.25it/s]
62%|██████▏ | 6232/10000 [00:07<00:04, 829.99it/s]
63%|██████▎ | 6318/10000 [00:07<00:04, 838.30it/s]
64%|██████▍ | 6405/10000 [00:07<00:04, 847.52it/s]
65%|██████▍ | 6492/10000 [00:07<00:04, 851.14it/s]
66%|██████▌ | 6578/10000 [00:07<00:04, 852.08it/s]
67%|██████▋ | 6665/10000 [00:07<00:03, 856.88it/s]
68%|██████▊ | 6753/10000 [00:08<00:03, 863.37it/s]
68%|██████▊ | 6840/10000 [00:08<00:03, 861.42it/s]
69%|██████▉ | 6927/10000 [00:08<00:03, 858.13it/s]
70%|███████ | 7013/10000 [00:08<00:03, 851.80it/s]
71%|███████ | 7101/10000 [00:08<00:03, 858.00it/s]
72%|███████▏ | 7190/10000 [00:08<00:03, 865.50it/s]
73%|███████▎ | 7278/10000 [00:08<00:03, 867.99it/s]
74%|███████▎ | 7365/10000 [00:08<00:03, 845.01it/s]
74%|███████▍ | 7450/10000 [00:08<00:03, 845.44it/s]
75%|███████▌ | 7535/10000 [00:08<00:02, 833.75it/s]
76%|███████▌ | 7619/10000 [00:09<00:02, 822.06it/s]
77%|███████▋ | 7703/10000 [00:09<00:02, 827.22it/s]
78%|███████▊ | 7786/10000 [00:09<00:02, 823.85it/s]
79%|███████▊ | 7869/10000 [00:09<00:02, 824.08it/s]
80%|███████▉ | 7955/10000 [00:09<00:02, 832.34it/s]
80%|████████ | 8043/10000 [00:09<00:02, 844.06it/s]
81%|████████▏ | 8129/10000 [00:09<00:02, 848.14it/s]
82%|████████▏ | 8217/10000 [00:09<00:02, 857.15it/s]
83%|████████▎ | 8305/10000 [00:09<00:01, 861.45it/s]
84%|████████▍ | 8392/10000 [00:09<00:01, 858.08it/s]
85%|████████▍ | 8478/10000 [00:10<00:01, 849.27it/s]
86%|████████▌ | 8563/10000 [00:10<00:01, 842.38it/s]
86%|████████▋ | 8648/10000 [00:10<00:01, 838.75it/s]
87%|████████▋ | 8732/10000 [00:10<00:01, 835.87it/s]
88%|████████▊ | 8816/10000 [00:10<00:01, 829.52it/s]
89%|████████▉ | 8900/10000 [00:10<00:01, 831.78it/s]
90%|████████▉ | 8984/10000 [00:10<00:01, 831.91it/s]
91%|█████████ | 9071/10000 [00:10<00:01, 842.65it/s]
92%|█████████▏| 9156/10000 [00:10<00:01, 826.85it/s]
92%|█████████▏| 9239/10000 [00:10<00:00, 795.65it/s]
93%|█████████▎| 9320/10000 [00:11<00:00, 798.09it/s]
94%|█████████▍| 9401/10000 [00:11<00:00, 800.73it/s]
95%|█████████▍| 9482/10000 [00:11<00:00, 793.82it/s]
96%|█████████▌| 9565/10000 [00:11<00:00, 801.65it/s]
96%|█████████▋| 9647/10000 [00:11<00:00, 806.04it/s]
97%|█████████▋| 9730/10000 [00:11<00:00, 812.43it/s]
98%|█████████▊| 9812/10000 [00:11<00:00, 813.68it/s]
99%|█████████▉| 9898/10000 [00:11<00:00, 826.76it/s]
100%|█████████▉| 9981/10000 [00:11<00:00, 813.77it/s]
100%|██████████| 10000/10000 [00:12<00:00, 790.22it/s]
Define new variables
Defining new variables...: 0%| | 0/20 [00:00<?, ?it/s]
Defining new variables...: 5%|▌ | 1/20 [00:00<00:01, 9.65it/s]
Defining new variables...: 15%|█▌ | 3/20 [00:00<00:01, 13.30it/s]
Defining new variables...: 25%|██▌ | 5/20 [00:00<00:01, 14.53it/s]
Defining new variables...: 35%|███▌ | 7/20 [00:00<00:00, 15.08it/s]
Defining new variables...: 45%|████▌ | 9/20 [00:00<00:00, 15.33it/s]
Defining new variables...: 55%|█████▌ | 11/20 [00:00<00:00, 15.60it/s]
Defining new variables...: 65%|██████▌ | 13/20 [00:00<00:00, 15.79it/s]
Defining new variables...: 75%|███████▌ | 15/20 [00:00<00:00, 15.94it/s]
Defining new variables...: 85%|████████▌ | 17/20 [00:01<00:00, 15.99it/s]
Defining new variables...: 95%|█████████▌| 19/20 [00:01<00:00, 16.05it/s]
Defining new variables...: 100%|██████████| 20/20 [00:03<00:00, 5.72it/s]
File nested_downtown_20.dat has been created.
Definition of the nest.
mu_asian = Beta('mu_asian', 1.0, 1.0, None, 0)
nest_asian = OneNestForNestedLogit(
nest_param=mu_asian, list_of_alternatives=asian, name='asian'
)
nests = NestsForNestedLogit(
choice_set=all_alternatives,
tuple_of_nests=(nest_asian,),
)
The following elements do not appear in any nest and are assumed each to be alone in a separate nest: {2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 32, 35, 36, 38, 39, 41, 42, 43, 44, 46, 48, 49, 52, 53, 54, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 71, 73, 74, 75, 77, 82, 83, 84, 85, 86, 88, 90, 93, 95, 96, 97, 99}. If it is not the intention, check the assignment of alternatives to nests.
log_probability = the_model_generation.get_nested_logit(nests)
the_biogeme = BIOGEME(biogeme_database, log_probability)
the_biogeme.model_name = MODEL_NAME
Biogeme parameters read from biogeme.toml.
Calculate the null log likelihood for reporting.
the_biogeme.calculate_null_loglikelihood(
{i: 1 for i in range(context.total_sample_size)}
)
-29957.32273553991
Estimate the parameters.
try:
results = EstimationResults.from_yaml_file(
filename=f'saved_results/{the_biogeme.model_name}.yaml'
)
except FileNotFoundError:
with timeit(f'Estimate of model {the_biogeme.model_name}'):
results = the_biogeme.estimate()
Traceback (most recent call last):
File "/Users/bierlair/MyFiles/github/biogeme/docs/source/examples/sampling/plot_b02nested.py", line 116, in <module>
results = EstimationResults.from_yaml_file(
filename=f'saved_results/{the_biogeme.model_name}.yaml'
)
File "/Users/bierlair/MyFiles/github/biogeme/src/biogeme/results_processing/estimation_results.py", line 115, in from_yaml_file
restored_results: RawEstimationResults = deserialize_from_yaml(
~~~~~~~~~~~~~~~~~~~~~^
filename=filename
^^^^^^^^^^^^^^^^^
)
^
File "/Users/bierlair/MyFiles/github/biogeme/src/biogeme/results_processing/raw_estimation_results.py", line 106, in deserialize_from_yaml
if data['bootstrap_time'] is not None
~~~~^^^^^^^^^^^^^^^^^^
TypeError: 'NoneType' object is not subscriptable
print(results.short_summary())
parameters_tables = get_pandas_estimated_parameters(estimation_results=results)
estimated_parameters = parameters_tables['Estimated parameters']
display(estimated_parameters)
df, msg = compare(estimated_parameters)
print(df)
print(msg)
Total running time of the script: (0 minutes 18.961 seconds)