Note
Go to the end to download the full example code.
Nested logit
Estimation of a nested logit model using sampling of alternatives.
- author:
Michel Bierlaire
- date:
Wed Nov 1 18:00:15 2023
import pandas as pd
from biogeme.sampling_of_alternatives import (
SamplingContext,
ChoiceSetsGeneration,
GenerateModel,
generate_segment_size,
)
from biogeme.expressions import Beta
from biogeme.nests import OneNestForNestedLogit, NestsForNestedLogit
import biogeme.biogeme_logging as blog
import biogeme.biogeme as bio
from specification_sampling import V, combined_variables
from compare import compare
from alternatives import (
alternatives,
ID_COLUMN,
partitions,
asian,
all_alternatives,
)
logger = blog.get_screen_logger(level=blog.INFO)
SAMPLE_SIZE = 20 # out of 100
SAMPLE_SIZE_MEV = 33 # out of 33
CHOICE_COLUMN = 'nested_0'
PARTITION = 'downtown'
MEV_PARTITION = 'uniform_asian'
MODEL_NAME = f'nested_{PARTITION}_{SAMPLE_SIZE}'
FILE_NAME = f'{MODEL_NAME}.dat'
the_partition = partitions.get(PARTITION)
if the_partition is None:
raise ValueError(f'Unknown partition: {PARTITION}')
segment_sizes = generate_segment_size(SAMPLE_SIZE, the_partition.number_of_segments())
We use all alternatives in the nest.
mev_partition = partitions.get(MEV_PARTITION)
if mev_partition is None:
raise ValueError(f'Unknown partition: {MEV_PARTITION}')
mev_segment_sizes = [SAMPLE_SIZE_MEV]
observations = pd.read_csv('obs_choice.dat')
context = SamplingContext(
the_partition=the_partition,
sample_sizes=segment_sizes,
individuals=observations,
choice_column=CHOICE_COLUMN,
alternatives=alternatives,
id_column=ID_COLUMN,
biogeme_file_name=FILE_NAME,
utility_function=V,
combined_variables=combined_variables,
mev_partition=mev_partition,
mev_sample_sizes=mev_segment_sizes,
)
logger.info(context.reporting())
Size of the choice set: 100
Main partition: 2 segment(s) of size 46, 54
Main sample: 20: 10/46, 10/54
Nbr of MEV alternatives: 33
MEV partition: 1 segment(s) of size 33
MEV sample: 33: 33/33
the_data_generation = ChoiceSetsGeneration(context=context)
the_model_generation = GenerateModel(context=context)
biogeme_database = the_data_generation.sample_and_merge(recycle=False)
Generating 20 + 33 alternatives for 10000 observations
0%| | 0/10000 [00:00<?, ?it/s]
1%| | 80/10000 [00:00<00:12, 794.30it/s]
2%|▏ | 162/10000 [00:00<00:12, 809.32it/s]
2%|▏ | 243/10000 [00:00<00:12, 802.39it/s]
3%|▎ | 324/10000 [00:00<00:12, 802.40it/s]
4%|▍ | 406/10000 [00:00<00:11, 807.01it/s]
5%|▍ | 487/10000 [00:00<00:11, 802.54it/s]
6%|▌ | 568/10000 [00:00<00:11, 797.74it/s]
6%|▋ | 648/10000 [00:00<00:11, 794.93it/s]
7%|▋ | 729/10000 [00:00<00:11, 798.59it/s]
8%|▊ | 809/10000 [00:01<00:11, 794.76it/s]
9%|▉ | 889/10000 [00:01<00:11, 795.77it/s]
10%|▉ | 970/10000 [00:01<00:11, 798.77it/s]
11%|█ | 1052/10000 [00:01<00:11, 804.48it/s]
11%|█▏ | 1133/10000 [00:01<00:11, 801.92it/s]
12%|█▏ | 1214/10000 [00:01<00:10, 800.67it/s]
13%|█▎ | 1295/10000 [00:01<00:10, 792.49it/s]
14%|█▍ | 1376/10000 [00:01<00:10, 794.83it/s]
15%|█▍ | 1456/10000 [00:01<00:10, 792.32it/s]
15%|█▌ | 1536/10000 [00:01<00:10, 790.00it/s]
16%|█▌ | 1616/10000 [00:02<00:10, 785.48it/s]
17%|█▋ | 1696/10000 [00:02<00:10, 788.59it/s]
18%|█▊ | 1775/10000 [00:02<00:10, 785.20it/s]
19%|█▊ | 1855/10000 [00:02<00:10, 787.53it/s]
19%|█▉ | 1935/10000 [00:02<00:10, 790.90it/s]
20%|██ | 2015/10000 [00:02<00:10, 785.24it/s]
21%|██ | 2094/10000 [00:02<00:10, 770.94it/s]
22%|██▏ | 2172/10000 [00:02<00:10, 766.16it/s]
22%|██▏ | 2249/10000 [00:02<00:10, 757.04it/s]
23%|██▎ | 2325/10000 [00:02<00:10, 750.69it/s]
24%|██▍ | 2402/10000 [00:03<00:10, 756.29it/s]
25%|██▍ | 2481/10000 [00:03<00:09, 765.47it/s]
26%|██▌ | 2560/10000 [00:03<00:09, 772.20it/s]
26%|██▋ | 2641/10000 [00:03<00:09, 783.29it/s]
27%|██▋ | 2720/10000 [00:03<00:09, 781.71it/s]
28%|██▊ | 2800/10000 [00:03<00:09, 785.64it/s]
29%|██▉ | 2879/10000 [00:03<00:09, 785.86it/s]
30%|██▉ | 2961/10000 [00:03<00:08, 794.53it/s]
30%|███ | 3042/10000 [00:03<00:08, 796.98it/s]
31%|███ | 3124/10000 [00:03<00:08, 801.18it/s]
32%|███▏ | 3205/10000 [00:04<00:08, 800.60it/s]
33%|███▎ | 3286/10000 [00:04<00:08, 796.99it/s]
34%|███▎ | 3366/10000 [00:04<00:08, 797.69it/s]
34%|███▍ | 3446/10000 [00:04<00:08, 792.94it/s]
35%|███▌ | 3526/10000 [00:04<00:08, 794.35it/s]
36%|███▌ | 3607/10000 [00:04<00:08, 798.98it/s]
37%|███▋ | 3687/10000 [00:04<00:07, 795.83it/s]
38%|███▊ | 3769/10000 [00:04<00:07, 800.45it/s]
38%|███▊ | 3850/10000 [00:04<00:07, 795.01it/s]
39%|███▉ | 3931/10000 [00:04<00:07, 798.00it/s]
40%|████ | 4011/10000 [00:05<00:07, 794.91it/s]
41%|████ | 4091/10000 [00:05<00:07, 794.11it/s]
42%|████▏ | 4171/10000 [00:05<00:07, 794.85it/s]
43%|████▎ | 4251/10000 [00:05<00:07, 794.33it/s]
43%|████▎ | 4332/10000 [00:05<00:07, 797.84it/s]
44%|████▍ | 4412/10000 [00:05<00:07, 796.59it/s]
45%|████▍ | 4492/10000 [00:05<00:06, 796.48it/s]
46%|████▌ | 4572/10000 [00:05<00:06, 792.33it/s]
47%|████▋ | 4652/10000 [00:05<00:06, 793.64it/s]
47%|████▋ | 4732/10000 [00:05<00:06, 793.78it/s]
48%|████▊ | 4813/10000 [00:06<00:06, 797.15it/s]
49%|████▉ | 4894/10000 [00:06<00:06, 800.06it/s]
50%|████▉ | 4975/10000 [00:06<00:06, 801.56it/s]
51%|█████ | 5056/10000 [00:06<00:06, 800.16it/s]
51%|█████▏ | 5137/10000 [00:06<00:06, 799.02it/s]
52%|█████▏ | 5217/10000 [00:06<00:06, 796.81it/s]
53%|█████▎ | 5297/10000 [00:06<00:05, 795.42it/s]
54%|█████▍ | 5377/10000 [00:06<00:05, 794.28it/s]
55%|█████▍ | 5458/10000 [00:06<00:05, 798.62it/s]
55%|█████▌ | 5538/10000 [00:06<00:05, 796.06it/s]
56%|█████▌ | 5618/10000 [00:07<00:05, 795.52it/s]
57%|█████▋ | 5698/10000 [00:07<00:05, 794.26it/s]
58%|█████▊ | 5778/10000 [00:07<00:05, 788.48it/s]
59%|█████▊ | 5857/10000 [00:07<00:05, 786.74it/s]
59%|█████▉ | 5937/10000 [00:07<00:05, 789.87it/s]
60%|██████ | 6016/10000 [00:07<00:05, 788.95it/s]
61%|██████ | 6096/10000 [00:07<00:04, 791.09it/s]
62%|██████▏ | 6176/10000 [00:07<00:04, 788.79it/s]
63%|██████▎ | 6256/10000 [00:07<00:04, 790.96it/s]
63%|██████▎ | 6336/10000 [00:08<00:04, 792.56it/s]
64%|██████▍ | 6416/10000 [00:08<00:04, 792.67it/s]
65%|██████▍ | 6497/10000 [00:08<00:04, 796.61it/s]
66%|██████▌ | 6577/10000 [00:08<00:04, 792.19it/s]
67%|██████▋ | 6657/10000 [00:08<00:04, 789.39it/s]
67%|██████▋ | 6738/10000 [00:08<00:04, 794.25it/s]
68%|██████▊ | 6818/10000 [00:08<00:04, 790.89it/s]
69%|██████▉ | 6898/10000 [00:08<00:03, 792.43it/s]
70%|██████▉ | 6978/10000 [00:08<00:03, 794.19it/s]
71%|███████ | 7058/10000 [00:08<00:03, 793.50it/s]
71%|███████▏ | 7139/10000 [00:09<00:03, 795.46it/s]
72%|███████▏ | 7219/10000 [00:09<00:03, 796.49it/s]
73%|███████▎ | 7299/10000 [00:09<00:03, 796.52it/s]
74%|███████▍ | 7380/10000 [00:09<00:03, 797.97it/s]
75%|███████▍ | 7460/10000 [00:09<00:03, 796.74it/s]
75%|███████▌ | 7541/10000 [00:09<00:03, 800.13it/s]
76%|███████▌ | 7622/10000 [00:09<00:02, 801.85it/s]
77%|███████▋ | 7703/10000 [00:09<00:02, 796.69it/s]
78%|███████▊ | 7784/10000 [00:09<00:02, 798.53it/s]
79%|███████▊ | 7864/10000 [00:09<00:02, 795.47it/s]
79%|███████▉ | 7944/10000 [00:10<00:02, 792.58it/s]
80%|████████ | 8024/10000 [00:10<00:02, 793.29it/s]
81%|████████ | 8104/10000 [00:10<00:02, 794.29it/s]
82%|████████▏ | 8184/10000 [00:10<00:02, 795.60it/s]
83%|████████▎ | 8266/10000 [00:10<00:02, 801.06it/s]
83%|████████▎ | 8347/10000 [00:10<00:02, 797.27it/s]
84%|████████▍ | 8427/10000 [00:10<00:01, 794.30it/s]
85%|████████▌ | 8507/10000 [00:10<00:01, 790.25it/s]
86%|████████▌ | 8587/10000 [00:10<00:01, 790.93it/s]
87%|████████▋ | 8667/10000 [00:10<00:01, 791.47it/s]
87%|████████▋ | 8747/10000 [00:11<00:01, 792.64it/s]
88%|████████▊ | 8827/10000 [00:11<00:01, 793.65it/s]
89%|████████▉ | 8908/10000 [00:11<00:01, 798.37it/s]
90%|████████▉ | 8989/10000 [00:11<00:01, 800.95it/s]
91%|█████████ | 9070/10000 [00:11<00:01, 800.38it/s]
92%|█████████▏| 9152/10000 [00:11<00:01, 804.64it/s]
92%|█████████▏| 9233/10000 [00:11<00:00, 801.85it/s]
93%|█████████▎| 9314/10000 [00:11<00:00, 803.86it/s]
94%|█████████▍| 9395/10000 [00:11<00:00, 802.79it/s]
95%|█████████▍| 9476/10000 [00:11<00:00, 804.63it/s]
96%|█████████▌| 9557/10000 [00:12<00:00, 802.73it/s]
96%|█████████▋| 9638/10000 [00:12<00:00, 800.25it/s]
97%|█████████▋| 9719/10000 [00:12<00:00, 799.25it/s]
98%|█████████▊| 9801/10000 [00:12<00:00, 804.38it/s]
99%|█████████▉| 9882/10000 [00:12<00:00, 803.90it/s]
100%|█████████▉| 9963/10000 [00:12<00:00, 804.60it/s]
100%|██████████| 10000/10000 [00:13<00:00, 738.33it/s]
Define new variables
Defining new variables...: 0%| | 0/20 [00:00<?, ?it/s]
Defining new variables...: 5%|▌ | 1/20 [00:00<00:05, 3.37it/s]
Defining new variables...: 10%|█ | 2/20 [00:00<00:05, 3.19it/s]
Defining new variables...: 15%|█▌ | 3/20 [00:00<00:05, 3.13it/s]
Defining new variables...: 20%|██ | 4/20 [00:01<00:05, 3.11it/s]
Defining new variables...: 25%|██▌ | 5/20 [00:01<00:04, 3.06it/s]
Defining new variables...: 30%|███ | 6/20 [00:01<00:04, 3.06it/s]
Defining new variables...: 35%|███▌ | 7/20 [00:02<00:04, 3.06it/s]
Defining new variables...: 40%|████ | 8/20 [00:02<00:03, 3.05it/s]
Defining new variables...: 45%|████▌ | 9/20 [00:02<00:03, 3.04it/s]
Defining new variables...: 50%|█████ | 10/20 [00:03<00:03, 3.03it/s]
Defining new variables...: 55%|█████▌ | 11/20 [00:03<00:02, 3.04it/s]
Defining new variables...: 60%|██████ | 12/20 [00:03<00:02, 3.04it/s]
Defining new variables...: 65%|██████▌ | 13/20 [00:04<00:02, 3.04it/s]
Defining new variables...: 70%|███████ | 14/20 [00:04<00:01, 3.04it/s]
Defining new variables...: 75%|███████▌ | 15/20 [00:04<00:01, 3.04it/s]
Defining new variables...: 80%|████████ | 16/20 [00:05<00:01, 3.04it/s]
Defining new variables...: 85%|████████▌ | 17/20 [00:05<00:00, 3.04it/s]
Defining new variables...: 90%|█████████ | 18/20 [00:05<00:00, 3.04it/s]
Defining new variables...: 95%|█████████▌| 19/20 [00:06<00:00, 3.04it/s]
Defining new variables...: 100%|██████████| 20/20 [00:06<00:00, 3.02it/s]
Defining new variables...: 100%|██████████| 20/20 [00:17<00:00, 1.13it/s]
File nested_downtown_20.dat has been created.
Definition of the nest.
mu_asian = Beta('mu_asian', 1.0, 1.0, None, 0)
nest_asian = OneNestForNestedLogit(
nest_param=mu_asian, list_of_alternatives=asian, name='asian'
)
nests = NestsForNestedLogit(
choice_set=all_alternatives,
tuple_of_nests=(nest_asian,),
)
The following elements do not appear in any nest and are assumed each to be alone in a separate nest: {2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 32, 35, 36, 38, 39, 41, 42, 43, 44, 46, 48, 49, 52, 53, 54, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 71, 73, 74, 75, 77, 82, 83, 84, 85, 86, 88, 90, 93, 95, 96, 97, 99}. If it is not the intention, check the assignment of alternatives to nests.
logprob = the_model_generation.get_nested_logit(nests)
the_biogeme = bio.BIOGEME(biogeme_database, logprob)
the_biogeme.modelName = MODEL_NAME
Biogeme parameters read from biogeme.toml.
Calculate the null log likelihood for reporting.
the_biogeme.calculate_null_loglikelihood(
{i: 1 for i in range(context.total_sample_size)}
)
np.float64(-29957.32273553647)
Estimate the parameters
results = the_biogeme.estimate(recycle=False)
As the model is not too complex, we activate the calculation of second derivatives. If you want to change it, change the name of the algorithm in the TOML file from "automatic" to "simple_bounds"
*** Initial values of the parameters are obtained from the file __nested_downtown_20.iter
Cannot read file __nested_downtown_20.iter. Statement is ignored.
As the model is not too complex, we activate the calculation of second derivatives. If you want to change it, change the name of the algorithm in the TOML file from "automatic" to "simple_bounds"
Optimization algorithm: hybrid Newton/BFGS with simple bounds [simple_bounds]
** Optimization: Newton with trust region for simple bounds
Iter. beta_chinese beta_ethiopian beta_french beta_indian beta_japanese beta_korean beta_lebanese beta_log_dist beta_mexican beta_price beta_rating mu_asian Function Relgrad Radius Rho
0 -0.094 -0.044 0.023 -0.12 0.12 -0.067 0.0027 -1 0.68 -0.51 0.47 1.2 2.4e+04 0.081 10 0.92 ++
1 0.18 0.52 0.83 0.39 1 0.24 0.78 -0.61 1.2 -0.4 0.76 1.4 2.3e+04 0.024 1e+02 1 ++
2 0.52 0.51 0.76 0.83 1.1 0.56 0.72 -0.62 1.2 -0.42 0.79 1.7 2.3e+04 0.012 1e+03 1.1 ++
3 0.63 0.5 0.75 0.91 1.2 0.64 0.72 -0.6 1.2 -0.41 0.77 1.9 2.3e+04 0.0032 1e+04 1.2 ++
4 0.69 0.5 0.75 0.97 1.2 0.69 0.71 -0.59 1.2 -0.41 0.76 2 2.3e+04 0.00042 1e+05 1.1 ++
5 0.69 0.5 0.75 0.97 1.2 0.69 0.71 -0.59 1.2 -0.41 0.76 2 2.3e+04 6.3e-06 1e+05 1 ++
Results saved in file nested_downtown_20.html
Results saved in file nested_downtown_20.pickle
print(results.short_summary())
Results for model nested_downtown_20
Nbr of parameters: 12
Sample size: 10000
Excluded data: 0
Null log likelihood: -29957.32
Final log likelihood: -22979.19
Likelihood ratio test (null): 13956.26
Rho square (null): 0.233
Rho bar square (null): 0.233
Akaike Information Criterion: 45982.39
Bayesian Information Criterion: 46068.91
estimated_parameters = results.get_estimated_parameters()
estimated_parameters
df, msg = compare(estimated_parameters)
print(df)
Name True Value Estimated Value T-Test
0 beta_rating 0.75 0.763591 -0.890885
1 beta_price -0.40 -0.404956 0.403743
2 beta_chinese 0.75 0.700229 0.698055
3 beta_japanese 1.25 1.236949 0.240375
4 beta_korean 0.75 0.696263 0.872402
5 beta_indian 1.00 0.976977 0.361951
6 beta_french 0.75 0.744952 0.103343
7 beta_mexican 1.25 1.204193 1.571277
8 beta_lebanese 0.75 0.713084 0.739766
9 beta_ethiopian 0.50 0.503377 -0.083940
10 beta_log_dist -0.60 -0.588648 -0.891786
11 mu_asian 2.00 2.019067 -0.322789
print(msg)
Parameters not estimated: ['mu_downtown']
Total running time of the script: (1 minutes 16.858 seconds)