Nested logit

Estimation of a nested logit model using sampling of alternatives.

Michel Bierlaire Sat Jul 26 2025, 13:01:22

import pandas as pd
from alternatives import ID_COLUMN, all_alternatives, alternatives, asian, partitions
from compare import compare
from IPython.core.display_functions import display
from specification_sampling import V, combined_variables

import biogeme.biogeme_logging as blog
from biogeme.biogeme import BIOGEME
from biogeme.expressions import Beta
from biogeme.nests import NestsForNestedLogit, OneNestForNestedLogit
from biogeme.results_processing import (
    EstimationResults,
    get_pandas_estimated_parameters,
)
from biogeme.sampling_of_alternatives import (
    ChoiceSetsGeneration,
    GenerateModel,
    SamplingContext,
    generate_segment_size,
)
from biogeme.tools import timeit
    ID  rating  price  ...   rest_lon    distance  downtown
0    0       1      4  ...  42.220972   71.735518       1.0
1    1       2      2  ...  50.549434  106.267205       0.0
2    2       3      3  ...  97.830520  136.298409       0.0
3    3       4      1  ...  69.152206   85.941147       0.0
4    4       4      3  ...  89.145620   96.773021       0.0
..  ..     ...    ...  ...        ...         ...       ...
95  95       4      3  ...   9.511387   84.166441       0.0
96  96       1      1  ...  92.144641   95.601366       0.0
97  97       4      2  ...  27.657518   30.440555       1.0
98  98       4      4  ...  32.303213   45.027143       1.0
99  99       4      1  ...  13.672495   25.703295       1.0

[100 rows x 16 columns]
Number of asian restaurants: 33
logger = blog.get_screen_logger(level=blog.INFO)
SAMPLE_SIZE = 20  # out of 100
SAMPLE_SIZE_MEV = 33  # out of 33
CHOICE_COLUMN = 'nested_0'
PARTITION = 'downtown'
MEV_PARTITION = 'uniform_asian'
MODEL_NAME = f'nested_{PARTITION}_{SAMPLE_SIZE}'
FILE_NAME = f'{MODEL_NAME}.dat'
the_partition = partitions.get(PARTITION)
if the_partition is None:
    raise ValueError(f'Unknown partition: {PARTITION}')
segment_sizes = generate_segment_size(SAMPLE_SIZE, the_partition.number_of_segments())

We use all alternatives in the nest.

mev_partition = partitions.get(MEV_PARTITION)
if mev_partition is None:
    raise ValueError(f'Unknown partition: {MEV_PARTITION}')
mev_segment_sizes = [SAMPLE_SIZE_MEV]
observations = pd.read_csv('obs_choice.dat')
context = SamplingContext(
    the_partition=the_partition,
    sample_sizes=segment_sizes,
    individuals=observations,
    choice_column=CHOICE_COLUMN,
    alternatives=alternatives,
    id_column=ID_COLUMN,
    biogeme_file_name=FILE_NAME,
    utility_function=V,
    combined_variables=combined_variables,
    mev_partition=mev_partition,
    mev_sample_sizes=mev_segment_sizes,
)
logger.info(context.reporting())
Size of the choice set: 100
Main partition: 2 segment(s) of size 46, 54
Main sample: 20: 10/46, 10/54
Nbr of MEV alternatives: 33
MEV partition: 1 segment(s) of size 33
MEV sample: 33: 33/33
the_data_generation = ChoiceSetsGeneration(context=context)
the_model_generation = GenerateModel(context=context)
biogeme_database = the_data_generation.sample_and_merge(recycle=False)
Generating 20 + 33 alternatives for 10000 observations

  0%|          | 0/10000 [00:00<?, ?it/s]
  1%|          | 81/10000 [00:00<00:12, 801.20it/s]
  2%|▏         | 166/10000 [00:00<00:11, 825.88it/s]
  2%|▏         | 249/10000 [00:00<00:11, 818.44it/s]
  3%|▎         | 332/10000 [00:00<00:11, 821.13it/s]
  4%|▍         | 418/10000 [00:00<00:11, 831.48it/s]
  5%|▌         | 503/10000 [00:00<00:11, 835.37it/s]
  6%|▌         | 589/10000 [00:00<00:11, 841.08it/s]
  7%|▋         | 674/10000 [00:00<00:11, 841.65it/s]
  8%|▊         | 759/10000 [00:00<00:10, 843.14it/s]
  8%|▊         | 844/10000 [00:01<00:10, 844.35it/s]
  9%|▉         | 929/10000 [00:01<00:10, 843.22it/s]
 10%|█         | 1014/10000 [00:01<00:10, 841.61it/s]
 11%|█         | 1099/10000 [00:01<00:10, 842.72it/s]
 12%|█▏        | 1185/10000 [00:01<00:10, 845.13it/s]
 13%|█▎        | 1270/10000 [00:01<00:10, 843.99it/s]
 14%|█▎        | 1355/10000 [00:01<00:10, 844.19it/s]
 14%|█▍        | 1440/10000 [00:01<00:10, 841.98it/s]
 15%|█▌        | 1525/10000 [00:01<00:10, 840.72it/s]
 16%|█▌        | 1610/10000 [00:01<00:09, 843.24it/s]
 17%|█▋        | 1695/10000 [00:02<00:09, 841.99it/s]
 18%|█▊        | 1780/10000 [00:02<00:09, 841.59it/s]
 19%|█▊        | 1865/10000 [00:02<00:09, 840.66it/s]
 20%|█▉        | 1950/10000 [00:02<00:09, 836.61it/s]
 20%|██        | 2034/10000 [00:02<00:09, 834.71it/s]
 21%|██        | 2119/10000 [00:02<00:09, 836.51it/s]
 22%|██▏       | 2205/10000 [00:02<00:09, 841.45it/s]
 23%|██▎       | 2290/10000 [00:02<00:09, 842.37it/s]
 24%|██▍       | 2375/10000 [00:02<00:09, 841.77it/s]
 25%|██▍       | 2460/10000 [00:02<00:08, 838.42it/s]
 25%|██▌       | 2544/10000 [00:03<00:08, 838.26it/s]
 26%|██▋       | 2629/10000 [00:03<00:08, 840.55it/s]
 27%|██▋       | 2714/10000 [00:03<00:08, 839.51it/s]
 28%|██▊       | 2798/10000 [00:03<00:08, 836.32it/s]
 29%|██▉       | 2883/10000 [00:03<00:08, 838.01it/s]
 30%|██▉       | 2967/10000 [00:03<00:08, 836.54it/s]
 31%|███       | 3053/10000 [00:03<00:08, 841.45it/s]
 31%|███▏      | 3138/10000 [00:03<00:08, 841.27it/s]
 32%|███▏      | 3224/10000 [00:03<00:08, 844.07it/s]
 33%|███▎      | 3310/10000 [00:03<00:07, 848.16it/s]
 34%|███▍      | 3396/10000 [00:04<00:07, 849.78it/s]
 35%|███▍      | 3482/10000 [00:04<00:07, 851.84it/s]
 36%|███▌      | 3568/10000 [00:04<00:07, 846.71it/s]
 37%|███▋      | 3653/10000 [00:04<00:07, 845.13it/s]
 37%|███▋      | 3738/10000 [00:04<00:07, 843.62it/s]
 38%|███▊      | 3825/10000 [00:04<00:07, 848.58it/s]
 39%|███▉      | 3912/10000 [00:04<00:07, 852.13it/s]
 40%|███▉      | 3998/10000 [00:04<00:07, 846.16it/s]
 41%|████      | 4083/10000 [00:04<00:06, 847.25it/s]
 42%|████▏     | 4169/10000 [00:04<00:06, 849.09it/s]
 43%|████▎     | 4255/10000 [00:05<00:06, 851.19it/s]
 43%|████▎     | 4342/10000 [00:05<00:06, 854.73it/s]
 44%|████▍     | 4428/10000 [00:05<00:06, 852.70it/s]
 45%|████▌     | 4514/10000 [00:05<00:06, 847.85it/s]
 46%|████▌     | 4601/10000 [00:05<00:06, 851.76it/s]
 47%|████▋     | 4687/10000 [00:05<00:06, 853.23it/s]
 48%|████▊     | 4774/10000 [00:05<00:06, 856.12it/s]
 49%|████▊     | 4860/10000 [00:05<00:06, 854.61it/s]
 49%|████▉     | 4946/10000 [00:05<00:05, 853.26it/s]
 50%|█████     | 5032/10000 [00:05<00:05, 852.38it/s]
 51%|█████     | 5118/10000 [00:06<00:05, 850.65it/s]
 52%|█████▏    | 5205/10000 [00:06<00:05, 854.87it/s]
 53%|█████▎    | 5291/10000 [00:06<00:05, 849.02it/s]
 54%|█████▍    | 5377/10000 [00:06<00:05, 852.04it/s]
 55%|█████▍    | 5463/10000 [00:06<00:05, 847.77it/s]
 55%|█████▌    | 5549/10000 [00:06<00:05, 849.50it/s]
 56%|█████▋    | 5635/10000 [00:06<00:05, 850.65it/s]
 57%|█████▋    | 5721/10000 [00:06<00:05, 846.81it/s]
 58%|█████▊    | 5806/10000 [00:06<00:04, 843.82it/s]
 59%|█████▉    | 5891/10000 [00:06<00:04, 843.01it/s]
 60%|█████▉    | 5976/10000 [00:07<00:05, 795.38it/s]
 61%|██████    | 6061/10000 [00:07<00:04, 810.36it/s]
 61%|██████▏   | 6146/10000 [00:07<00:04, 821.25it/s]
 62%|██████▏   | 6232/10000 [00:07<00:04, 829.99it/s]
 63%|██████▎   | 6318/10000 [00:07<00:04, 838.30it/s]
 64%|██████▍   | 6405/10000 [00:07<00:04, 847.52it/s]
 65%|██████▍   | 6492/10000 [00:07<00:04, 851.14it/s]
 66%|██████▌   | 6578/10000 [00:07<00:04, 852.08it/s]
 67%|██████▋   | 6665/10000 [00:07<00:03, 856.88it/s]
 68%|██████▊   | 6753/10000 [00:08<00:03, 863.37it/s]
 68%|██████▊   | 6840/10000 [00:08<00:03, 861.42it/s]
 69%|██████▉   | 6927/10000 [00:08<00:03, 858.13it/s]
 70%|███████   | 7013/10000 [00:08<00:03, 851.80it/s]
 71%|███████   | 7101/10000 [00:08<00:03, 858.00it/s]
 72%|███████▏  | 7190/10000 [00:08<00:03, 865.50it/s]
 73%|███████▎  | 7278/10000 [00:08<00:03, 867.99it/s]
 74%|███████▎  | 7365/10000 [00:08<00:03, 845.01it/s]
 74%|███████▍  | 7450/10000 [00:08<00:03, 845.44it/s]
 75%|███████▌  | 7535/10000 [00:08<00:02, 833.75it/s]
 76%|███████▌  | 7619/10000 [00:09<00:02, 822.06it/s]
 77%|███████▋  | 7703/10000 [00:09<00:02, 827.22it/s]
 78%|███████▊  | 7786/10000 [00:09<00:02, 823.85it/s]
 79%|███████▊  | 7869/10000 [00:09<00:02, 824.08it/s]
 80%|███████▉  | 7955/10000 [00:09<00:02, 832.34it/s]
 80%|████████  | 8043/10000 [00:09<00:02, 844.06it/s]
 81%|████████▏ | 8129/10000 [00:09<00:02, 848.14it/s]
 82%|████████▏ | 8217/10000 [00:09<00:02, 857.15it/s]
 83%|████████▎ | 8305/10000 [00:09<00:01, 861.45it/s]
 84%|████████▍ | 8392/10000 [00:09<00:01, 858.08it/s]
 85%|████████▍ | 8478/10000 [00:10<00:01, 849.27it/s]
 86%|████████▌ | 8563/10000 [00:10<00:01, 842.38it/s]
 86%|████████▋ | 8648/10000 [00:10<00:01, 838.75it/s]
 87%|████████▋ | 8732/10000 [00:10<00:01, 835.87it/s]
 88%|████████▊ | 8816/10000 [00:10<00:01, 829.52it/s]
 89%|████████▉ | 8900/10000 [00:10<00:01, 831.78it/s]
 90%|████████▉ | 8984/10000 [00:10<00:01, 831.91it/s]
 91%|█████████ | 9071/10000 [00:10<00:01, 842.65it/s]
 92%|█████████▏| 9156/10000 [00:10<00:01, 826.85it/s]
 92%|█████████▏| 9239/10000 [00:10<00:00, 795.65it/s]
 93%|█████████▎| 9320/10000 [00:11<00:00, 798.09it/s]
 94%|█████████▍| 9401/10000 [00:11<00:00, 800.73it/s]
 95%|█████████▍| 9482/10000 [00:11<00:00, 793.82it/s]
 96%|█████████▌| 9565/10000 [00:11<00:00, 801.65it/s]
 96%|█████████▋| 9647/10000 [00:11<00:00, 806.04it/s]
 97%|█████████▋| 9730/10000 [00:11<00:00, 812.43it/s]
 98%|█████████▊| 9812/10000 [00:11<00:00, 813.68it/s]
 99%|█████████▉| 9898/10000 [00:11<00:00, 826.76it/s]
100%|█████████▉| 9981/10000 [00:11<00:00, 813.77it/s]
100%|██████████| 10000/10000 [00:12<00:00, 790.22it/s]
Define new variables

Defining new variables...:   0%|          | 0/20 [00:00<?, ?it/s]
Defining new variables...:   5%|▌         | 1/20 [00:00<00:01,  9.65it/s]
Defining new variables...:  15%|█▌        | 3/20 [00:00<00:01, 13.30it/s]
Defining new variables...:  25%|██▌       | 5/20 [00:00<00:01, 14.53it/s]
Defining new variables...:  35%|███▌      | 7/20 [00:00<00:00, 15.08it/s]
Defining new variables...:  45%|████▌     | 9/20 [00:00<00:00, 15.33it/s]
Defining new variables...:  55%|█████▌    | 11/20 [00:00<00:00, 15.60it/s]
Defining new variables...:  65%|██████▌   | 13/20 [00:00<00:00, 15.79it/s]
Defining new variables...:  75%|███████▌  | 15/20 [00:00<00:00, 15.94it/s]
Defining new variables...:  85%|████████▌ | 17/20 [00:01<00:00, 15.99it/s]
Defining new variables...:  95%|█████████▌| 19/20 [00:01<00:00, 16.05it/s]
Defining new variables...: 100%|██████████| 20/20 [00:03<00:00,  5.72it/s]
File nested_downtown_20.dat has been created.

Definition of the nest.

mu_asian = Beta('mu_asian', 1.0, 1.0, None, 0)
nest_asian = OneNestForNestedLogit(
    nest_param=mu_asian, list_of_alternatives=asian, name='asian'
)
nests = NestsForNestedLogit(
    choice_set=all_alternatives,
    tuple_of_nests=(nest_asian,),
)
The following elements do not appear in any nest and are assumed each to be alone in a separate nest: {2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 32, 35, 36, 38, 39, 41, 42, 43, 44, 46, 48, 49, 52, 53, 54, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 71, 73, 74, 75, 77, 82, 83, 84, 85, 86, 88, 90, 93, 95, 96, 97, 99}. If it is not the intention, check the assignment of alternatives to nests.
log_probability = the_model_generation.get_nested_logit(nests)
the_biogeme = BIOGEME(biogeme_database, log_probability)
the_biogeme.model_name = MODEL_NAME
Biogeme parameters read from biogeme.toml.

Calculate the null log likelihood for reporting.

the_biogeme.calculate_null_loglikelihood(
    {i: 1 for i in range(context.total_sample_size)}
)
-29957.32273553991

Estimate the parameters.

try:
    results = EstimationResults.from_yaml_file(
        filename=f'saved_results/{the_biogeme.model_name}.yaml'
    )
except FileNotFoundError:
    with timeit(f'Estimate of model {the_biogeme.model_name}'):
        results = the_biogeme.estimate()
Traceback (most recent call last):
  File "/Users/bierlair/MyFiles/github/biogeme/docs/source/examples/sampling/plot_b02nested.py", line 116, in <module>
    results = EstimationResults.from_yaml_file(
        filename=f'saved_results/{the_biogeme.model_name}.yaml'
    )
  File "/Users/bierlair/MyFiles/github/biogeme/src/biogeme/results_processing/estimation_results.py", line 115, in from_yaml_file
    restored_results: RawEstimationResults = deserialize_from_yaml(
                                             ~~~~~~~~~~~~~~~~~~~~~^
        filename=filename
        ^^^^^^^^^^^^^^^^^
    )
    ^
  File "/Users/bierlair/MyFiles/github/biogeme/src/biogeme/results_processing/raw_estimation_results.py", line 106, in deserialize_from_yaml
    if data['bootstrap_time'] is not None
       ~~~~^^^^^^^^^^^^^^^^^^
TypeError: 'NoneType' object is not subscriptable
print(results.short_summary())
parameters_tables = get_pandas_estimated_parameters(estimation_results=results)
estimated_parameters = parameters_tables['Estimated parameters']
display(estimated_parameters)
df, msg = compare(estimated_parameters)
print(df)
print(msg)

Total running time of the script: (0 minutes 18.961 seconds)

Gallery generated by Sphinx-Gallery