Note
Go to the end to download the full example code
Nested logit
Estimation of a nested logit model using sampling of alternatives.
- author:
Michel Bierlaire
- date:
Wed Nov 1 18:00:15 2023
import pandas as pd
from biogeme.sampling_of_alternatives import (
SamplingContext,
ChoiceSetsGeneration,
GenerateModel,
generate_segment_size,
)
from biogeme.expressions import Beta
from biogeme.nests import OneNestForNestedLogit, NestsForNestedLogit
import biogeme.biogeme_logging as blog
import biogeme.biogeme as bio
from specification import V, combined_variables
from compare import compare
from alternatives import (
alternatives,
ID_COLUMN,
partitions,
asian,
all_alternatives,
)
logger = blog.get_screen_logger(level=blog.INFO)
SAMPLE_SIZE = 20 # out of 100
SAMPLE_SIZE_MEV = 33 # out of 33
CHOICE_COLUMN = 'nested_0'
PARTITION = 'downtown'
MEV_PARTITION = 'uniform_asian'
MODEL_NAME = f'nested_{PARTITION}_{SAMPLE_SIZE}'
FILE_NAME = f'{MODEL_NAME}.dat'
the_partition = partitions.get(PARTITION)
if the_partition is None:
raise ValueError(f'Unknown partition: {PARTITION}')
segment_sizes = generate_segment_size(SAMPLE_SIZE, the_partition.number_of_segments())
We use all alternatives in the nest.
mev_partition = partitions.get(MEV_PARTITION)
if mev_partition is None:
raise ValueError(f'Unknown partition: {MEV_PARTITION}')
mev_segment_sizes = [SAMPLE_SIZE_MEV]
observations = pd.read_csv('obs_choice.dat')
context = SamplingContext(
the_partition=the_partition,
sample_sizes=segment_sizes,
individuals=observations,
choice_column=CHOICE_COLUMN,
alternatives=alternatives,
id_column=ID_COLUMN,
biogeme_file_name=FILE_NAME,
utility_function=V,
combined_variables=combined_variables,
mev_partition=mev_partition,
mev_sample_sizes=mev_segment_sizes,
)
logger.info(context.reporting())
Size of the choice set: 100
Main partition: 2 segment(s) of size 46, 54
Main sample: 20: 10/46, 10/54
Nbr of MEV alternatives: 33
MEV partition: 1 segment(s) of size 33
MEV sample: 33: 33/33
the_data_generation = ChoiceSetsGeneration(context=context)
the_model_generation = GenerateModel(context=context)
biogeme_database = the_data_generation.sample_and_merge(recycle=False)
Generating 20 + 33 alternatives for 10000 observations
0%| | 0/10000 [00:00<?, ?it/s]
1%| | 53/10000 [00:00<00:18, 528.30it/s]
1%| | 106/10000 [00:00<00:20, 482.09it/s]
2%|▏ | 155/10000 [00:00<00:21, 465.19it/s]
2%|▏ | 202/10000 [00:00<00:21, 459.47it/s]
3%|▎ | 255/10000 [00:00<00:20, 482.43it/s]
3%|▎ | 304/10000 [00:00<00:20, 470.56it/s]
4%|▎ | 352/10000 [00:00<00:20, 465.70it/s]
4%|▍ | 400/10000 [00:00<00:20, 469.47it/s]
4%|▍ | 448/10000 [00:00<00:20, 462.99it/s]
5%|▍ | 498/10000 [00:01<00:20, 472.95it/s]
5%|▌ | 546/10000 [00:01<00:20, 469.43it/s]
6%|▌ | 593/10000 [00:01<00:20, 460.61it/s]
6%|▋ | 640/10000 [00:01<00:20, 450.45it/s]
7%|▋ | 687/10000 [00:01<00:20, 454.01it/s]
7%|▋ | 733/10000 [00:01<00:20, 446.39it/s]
8%|▊ | 778/10000 [00:01<00:20, 446.00it/s]
8%|▊ | 823/10000 [00:01<00:20, 442.13it/s]
9%|▉ | 877/10000 [00:01<00:19, 469.00it/s]
9%|▉ | 931/10000 [00:01<00:18, 489.44it/s]
10%|▉ | 981/10000 [00:02<00:18, 476.83it/s]
10%|█ | 1029/10000 [00:02<00:19, 469.92it/s]
11%|█ | 1077/10000 [00:02<00:19, 468.88it/s]
11%|█▏ | 1128/10000 [00:02<00:18, 479.19it/s]
12%|█▏ | 1181/10000 [00:02<00:17, 491.93it/s]
12%|█▏ | 1231/10000 [00:02<00:18, 482.98it/s]
13%|█▎ | 1280/10000 [00:02<00:18, 477.30it/s]
13%|█▎ | 1328/10000 [00:02<00:18, 471.46it/s]
14%|█▍ | 1376/10000 [00:02<00:18, 458.76it/s]
14%|█▍ | 1427/10000 [00:03<00:18, 471.10it/s]
15%|█▍ | 1475/10000 [00:03<00:18, 465.43it/s]
15%|█▌ | 1522/10000 [00:03<00:18, 466.11it/s]
16%|█▌ | 1572/10000 [00:03<00:17, 473.65it/s]
16%|█▌ | 1620/10000 [00:03<00:17, 466.36it/s]
17%|█▋ | 1667/10000 [00:03<00:17, 465.36it/s]
17%|█▋ | 1714/10000 [00:03<00:17, 462.38it/s]
18%|█▊ | 1761/10000 [00:03<00:17, 463.50it/s]
18%|█▊ | 1808/10000 [00:03<00:17, 460.06it/s]
19%|█▊ | 1857/10000 [00:03<00:17, 466.44it/s]
19%|█▉ | 1904/10000 [00:04<00:17, 460.30it/s]
20%|█▉ | 1953/10000 [00:04<00:17, 467.38it/s]
20%|██ | 2000/10000 [00:04<00:17, 460.21it/s]
20%|██ | 2047/10000 [00:04<00:17, 454.87it/s]
21%|██ | 2095/10000 [00:04<00:17, 459.69it/s]
21%|██▏ | 2141/10000 [00:04<00:17, 454.54it/s]
22%|██▏ | 2187/10000 [00:04<00:17, 451.52it/s]
22%|██▏ | 2234/10000 [00:04<00:17, 456.36it/s]
23%|██▎ | 2284/10000 [00:04<00:16, 466.37it/s]
23%|██▎ | 2333/10000 [00:05<00:16, 470.62it/s]
24%|██▍ | 2381/10000 [00:05<00:16, 462.78it/s]
24%|██▍ | 2428/10000 [00:05<00:16, 464.25it/s]
25%|██▍ | 2475/10000 [00:05<00:16, 461.39it/s]
25%|██▌ | 2522/10000 [00:05<00:16, 456.40it/s]
26%|██▌ | 2578/10000 [00:05<00:15, 486.50it/s]
26%|██▋ | 2627/10000 [00:05<00:15, 472.71it/s]
27%|██▋ | 2678/10000 [00:05<00:15, 481.64it/s]
27%|██▋ | 2727/10000 [00:05<00:15, 478.81it/s]
28%|██▊ | 2775/10000 [00:05<00:15, 476.75it/s]
28%|██▊ | 2823/10000 [00:06<00:15, 474.50it/s]
29%|██▊ | 2871/10000 [00:06<00:15, 475.05it/s]
29%|██▉ | 2922/10000 [00:06<00:14, 482.90it/s]
30%|██▉ | 2971/10000 [00:06<00:15, 464.09it/s]
30%|███ | 3018/10000 [00:06<00:15, 459.32it/s]
31%|███ | 3066/10000 [00:06<00:14, 462.84it/s]
31%|███ | 3113/10000 [00:06<00:14, 461.45it/s]
32%|███▏ | 3160/10000 [00:06<00:15, 450.33it/s]
32%|███▏ | 3206/10000 [00:06<00:15, 445.08it/s]
33%|███▎ | 3261/10000 [00:06<00:14, 474.09it/s]
33%|███▎ | 3309/10000 [00:07<00:14, 459.27it/s]
34%|███▎ | 3356/10000 [00:07<00:14, 453.47it/s]
34%|███▍ | 3403/10000 [00:07<00:14, 455.25it/s]
35%|███▍ | 3451/10000 [00:07<00:14, 460.47it/s]
35%|███▌ | 3506/10000 [00:07<00:13, 484.09it/s]
36%|███▌ | 3555/10000 [00:07<00:13, 475.93it/s]
36%|███▌ | 3603/10000 [00:07<00:13, 464.00it/s]
36%|███▋ | 3650/10000 [00:07<00:13, 462.53it/s]
37%|███▋ | 3697/10000 [00:07<00:13, 452.49it/s]
37%|███▋ | 3746/10000 [00:08<00:13, 460.87it/s]
38%|███▊ | 3793/10000 [00:08<00:13, 447.81it/s]
38%|███▊ | 3839/10000 [00:08<00:13, 448.83it/s]
39%|███▉ | 3884/10000 [00:08<00:13, 446.58it/s]
39%|███▉ | 3930/10000 [00:08<00:13, 449.88it/s]
40%|███▉ | 3977/10000 [00:08<00:13, 452.85it/s]
40%|████ | 4024/10000 [00:08<00:13, 456.38it/s]
41%|████ | 4070/10000 [00:08<00:13, 448.91it/s]
41%|████ | 4115/10000 [00:08<00:13, 444.37it/s]
42%|████▏ | 4163/10000 [00:08<00:12, 454.39it/s]
42%|████▏ | 4211/10000 [00:09<00:12, 459.75it/s]
43%|████▎ | 4259/10000 [00:09<00:12, 463.27it/s]
43%|████▎ | 4306/10000 [00:09<00:12, 455.56it/s]
44%|████▎ | 4352/10000 [00:09<00:12, 455.80it/s]
44%|████▍ | 4405/10000 [00:09<00:11, 474.01it/s]
45%|████▍ | 4453/10000 [00:09<00:11, 464.74it/s]
45%|████▌ | 4505/10000 [00:09<00:11, 477.57it/s]
46%|████▌ | 4555/10000 [00:09<00:11, 483.22it/s]
46%|████▌ | 4604/10000 [00:09<00:11, 475.96it/s]
47%|████▋ | 4653/10000 [00:10<00:11, 478.82it/s]
47%|████▋ | 4703/10000 [00:10<00:10, 482.51it/s]
48%|████▊ | 4752/10000 [00:10<00:10, 478.17it/s]
48%|████▊ | 4800/10000 [00:10<00:10, 472.92it/s]
48%|████▊ | 4848/10000 [00:10<00:11, 466.70it/s]
49%|████▉ | 4901/10000 [00:10<00:10, 485.07it/s]
50%|████▉ | 4950/10000 [00:10<00:10, 475.98it/s]
50%|████▉ | 4998/10000 [00:10<00:10, 473.73it/s]
50%|█████ | 5046/10000 [00:10<00:10, 464.47it/s]
51%|█████ | 5093/10000 [00:10<00:10, 464.97it/s]
51%|█████▏ | 5140/10000 [00:11<00:10, 462.59it/s]
52%|█████▏ | 5194/10000 [00:11<00:09, 484.01it/s]
52%|█████▎ | 5250/10000 [00:11<00:09, 503.34it/s]
53%|█████▎ | 5301/10000 [00:11<00:09, 483.79it/s]
54%|█████▎ | 5354/10000 [00:11<00:09, 497.05it/s]
54%|█████▍ | 5405/10000 [00:11<00:09, 498.14it/s]
55%|█████▍ | 5455/10000 [00:11<00:09, 482.31it/s]
55%|█████▌ | 5504/10000 [00:11<00:09, 474.38it/s]
56%|█████▌ | 5552/10000 [00:11<00:09, 461.47it/s]
56%|█████▌ | 5599/10000 [00:11<00:09, 463.30it/s]
56%|█████▋ | 5649/10000 [00:12<00:09, 472.50it/s]
57%|█████▋ | 5697/10000 [00:12<00:09, 465.08it/s]
57%|█████▋ | 5744/10000 [00:12<00:09, 455.08it/s]
58%|█████▊ | 5790/10000 [00:12<00:09, 449.23it/s]
58%|█████▊ | 5841/10000 [00:12<00:08, 464.63it/s]
59%|█████▉ | 5888/10000 [00:12<00:08, 462.92it/s]
59%|█████▉ | 5935/10000 [00:12<00:08, 454.78it/s]
60%|█████▉ | 5994/10000 [00:12<00:08, 491.96it/s]
60%|██████ | 6044/10000 [00:12<00:08, 482.84it/s]
61%|██████ | 6099/10000 [00:13<00:07, 502.25it/s]
62%|██████▏ | 6156/10000 [00:13<00:07, 520.53it/s]
62%|██████▏ | 6209/10000 [00:13<00:07, 497.36it/s]
63%|██████▎ | 6260/10000 [00:13<00:07, 475.23it/s]
63%|██████▎ | 6310/10000 [00:13<00:07, 482.10it/s]
64%|██████▎ | 6360/10000 [00:13<00:07, 486.76it/s]
64%|██████▍ | 6409/10000 [00:13<00:07, 475.80it/s]
65%|██████▍ | 6457/10000 [00:13<00:07, 474.47it/s]
65%|██████▌ | 6505/10000 [00:13<00:07, 463.43it/s]
66%|██████▌ | 6555/10000 [00:13<00:07, 473.48it/s]
66%|██████▌ | 6603/10000 [00:14<00:07, 465.43it/s]
66%|██████▋ | 6650/10000 [00:14<00:07, 459.39it/s]
67%|██████▋ | 6698/10000 [00:14<00:07, 463.37it/s]
67%|██████▋ | 6745/10000 [00:14<00:07, 457.74it/s]
68%|██████▊ | 6799/10000 [00:14<00:06, 480.44it/s]
69%|██████▊ | 6851/10000 [00:14<00:06, 491.65it/s]
69%|██████▉ | 6901/10000 [00:14<00:06, 480.19it/s]
70%|██████▉ | 6950/10000 [00:14<00:06, 471.45it/s]
70%|██████▉ | 6998/10000 [00:14<00:06, 463.31it/s]
70%|███████ | 7049/10000 [00:15<00:06, 474.51it/s]
71%|███████ | 7097/10000 [00:15<00:06, 472.31it/s]
71%|███████▏ | 7147/10000 [00:15<00:05, 478.92it/s]
72%|███████▏ | 7199/10000 [00:15<00:05, 488.52it/s]
73%|███████▎ | 7254/10000 [00:15<00:05, 504.90it/s]
73%|███████▎ | 7305/10000 [00:15<00:05, 502.93it/s]
74%|███████▎ | 7356/10000 [00:15<00:05, 503.46it/s]
74%|███████▍ | 7409/10000 [00:15<00:05, 508.49it/s]
75%|███████▍ | 7461/10000 [00:15<00:04, 511.22it/s]
75%|███████▌ | 7514/10000 [00:15<00:04, 514.88it/s]
76%|███████▌ | 7571/10000 [00:16<00:04, 529.92it/s]
76%|███████▋ | 7625/10000 [00:16<00:04, 515.53it/s]
77%|███████▋ | 7686/10000 [00:16<00:04, 542.93it/s]
77%|███████▋ | 7741/10000 [00:16<00:04, 536.67it/s]
78%|███████▊ | 7795/10000 [00:16<00:04, 529.51it/s]
78%|███████▊ | 7849/10000 [00:16<00:04, 526.54it/s]
79%|███████▉ | 7902/10000 [00:16<00:04, 520.87it/s]
80%|███████▉ | 7955/10000 [00:16<00:03, 519.76it/s]
80%|████████ | 8008/10000 [00:16<00:03, 517.12it/s]
81%|████████ | 8066/10000 [00:16<00:03, 533.96it/s]
81%|████████ | 8120/10000 [00:17<00:03, 519.64it/s]
82%|████████▏ | 8173/10000 [00:17<00:03, 520.92it/s]
82%|████████▏ | 8226/10000 [00:17<00:03, 518.88it/s]
83%|████████▎ | 8279/10000 [00:17<00:03, 520.46it/s]
83%|████████▎ | 8345/10000 [00:17<00:02, 559.41it/s]
84%|████████▍ | 8402/10000 [00:17<00:02, 534.81it/s]
85%|████████▍ | 8456/10000 [00:17<00:02, 522.35it/s]
85%|████████▌ | 8509/10000 [00:17<00:02, 516.04it/s]
86%|████████▌ | 8566/10000 [00:17<00:02, 529.13it/s]
86%|████████▌ | 8621/10000 [00:18<00:02, 533.07it/s]
87%|████████▋ | 8676/10000 [00:18<00:02, 537.58it/s]
87%|████████▋ | 8730/10000 [00:18<00:02, 528.96it/s]
88%|████████▊ | 8784/10000 [00:18<00:02, 531.04it/s]
88%|████████▊ | 8839/10000 [00:18<00:02, 534.11it/s]
89%|████████▉ | 8893/10000 [00:18<00:02, 526.02it/s]
89%|████████▉ | 8946/10000 [00:18<00:02, 526.47it/s]
90%|█████████ | 9001/10000 [00:18<00:01, 533.02it/s]
91%|█████████ | 9066/10000 [00:18<00:01, 564.75it/s]
91%|█████████ | 9123/10000 [00:18<00:01, 549.63it/s]
92%|█████████▏| 9182/10000 [00:19<00:01, 558.39it/s]
92%|█████████▏| 9238/10000 [00:19<00:01, 548.76it/s]
93%|█████████▎| 9293/10000 [00:19<00:01, 547.26it/s]
93%|█████████▎| 9348/10000 [00:19<00:01, 539.30it/s]
94%|█████████▍| 9408/10000 [00:19<00:01, 555.43it/s]
95%|█████████▍| 9464/10000 [00:19<00:00, 539.55it/s]
95%|█████████▌| 9519/10000 [00:19<00:00, 536.88it/s]
96%|█████████▌| 9578/10000 [00:19<00:00, 549.69it/s]
96%|█████████▋| 9634/10000 [00:19<00:00, 527.24it/s]
97%|█████████▋| 9687/10000 [00:20<00:00, 520.22it/s]
97%|█████████▋| 9740/10000 [00:20<00:00, 520.90it/s]
98%|█████████▊| 9795/10000 [00:20<00:00, 528.96it/s]
98%|█████████▊| 9849/10000 [00:20<00:00, 529.44it/s]
99%|█████████▉| 9903/10000 [00:20<00:00, 522.50it/s]
100%|█████████▉| 9962/10000 [00:20<00:00, 541.42it/s]
100%|██████████| 10000/10000 [00:22<00:00, 453.28it/s]
Define new variables
Defining new variables...: 0%| | 0/20 [00:00<?, ?it/s]
Defining new variables...: 5%|▌ | 1/20 [00:00<00:08, 2.33it/s]
Defining new variables...: 10%|█ | 2/20 [00:00<00:08, 2.19it/s]
Defining new variables...: 15%|█▌ | 3/20 [00:01<00:07, 2.15it/s]
Defining new variables...: 20%|██ | 4/20 [00:01<00:07, 2.15it/s]
Defining new variables...: 25%|██▌ | 5/20 [00:02<00:07, 2.13it/s]
Defining new variables...: 30%|███ | 6/20 [00:02<00:06, 2.15it/s]
Defining new variables...: 35%|███▌ | 7/20 [00:03<00:06, 2.16it/s]
Defining new variables...: 40%|████ | 8/20 [00:03<00:05, 2.14it/s]
Defining new variables...: 45%|████▌ | 9/20 [00:04<00:05, 2.12it/s]
Defining new variables...: 50%|█████ | 10/20 [00:04<00:04, 2.10it/s]
Defining new variables...: 55%|█████▌ | 11/20 [00:05<00:04, 2.11it/s]
Defining new variables...: 60%|██████ | 12/20 [00:05<00:03, 2.10it/s]
Defining new variables...: 65%|██████▌ | 13/20 [00:06<00:03, 2.12it/s]
Defining new variables...: 70%|███████ | 14/20 [00:06<00:02, 2.13it/s]
Defining new variables...: 75%|███████▌ | 15/20 [00:07<00:02, 2.13it/s]
Defining new variables...: 80%|████████ | 16/20 [00:07<00:01, 2.13it/s]
Defining new variables...: 85%|████████▌ | 17/20 [00:07<00:01, 2.12it/s]
Defining new variables...: 90%|█████████ | 18/20 [00:08<00:00, 2.12it/s]
Defining new variables...: 95%|█████████▌| 19/20 [00:08<00:00, 2.09it/s]
Defining new variables...: 100%|██████████| 20/20 [00:09<00:00, 2.09it/s]
Defining new variables...: 100%|██████████| 20/20 [00:25<00:00, 1.29s/it]
File nested_downtown_20.dat has been created.
Definition of the nest.
mu_asian = Beta('mu_asian', 1.0, 1.0, None, 0)
nest_asian = OneNestForNestedLogit(
nest_param=mu_asian, list_of_alternatives=asian, name='asian'
)
nests = NestsForNestedLogit(
choice_set=all_alternatives,
tuple_of_nests=(nest_asian,),
)
logprob = the_model_generation.get_nested_logit(nests)
the_biogeme = bio.BIOGEME(biogeme_database, logprob)
the_biogeme.modelName = MODEL_NAME
File biogeme.toml has been parsed.
Calculate the null log likelihood for reporting.
the_biogeme.calculateNullLoglikelihood({i: 1 for i in range(context.total_sample_size)})
-29957.32273553647
Estimate the parameters
results = the_biogeme.estimate(recycle=False)
*** Initial values of the parameters are obtained from the file __nested_downtown_20.iter
Parameter values restored from __nested_downtown_20.iter
Optimization algorithm: hybrid Newton/BFGS with simple bounds [simple_bounds]
** Optimization: Newton with trust region for simple bounds
Iter. beta_chinese beta_ethiopian beta_french beta_indian beta_japanese beta_korean beta_lebanese beta_log_dist beta_mexican beta_price beta_rating mu_asian Function Relgrad Radius Rho
0 -0.096 -0.04 0.018 -0.12 0.12 -0.067 0.0058 -1 0.67 -0.51 0.47 1.2 2.4e+04 0.076 10 0.93 ++
1 0.12 0.51 0.78 0.34 0.96 0.2 0.79 -0.63 1.2 -0.4 0.76 1.4 2.3e+04 0.023 1e+02 1 ++
2 0.45 0.5 0.72 0.78 1.1 0.51 0.73 -0.64 1.2 -0.42 0.79 1.7 2.3e+04 0.012 1e+03 1.1 ++
3 0.57 0.5 0.71 0.86 1.2 0.59 0.73 -0.61 1.2 -0.41 0.78 1.9 2.3e+04 0.0031 1e+04 1.2 ++
4 0.63 0.5 0.71 0.92 1.2 0.65 0.73 -0.6 1.2 -0.4 0.77 2 2.3e+04 0.00039 1e+05 1.1 ++
5 0.63 0.5 0.71 0.92 1.2 0.65 0.73 -0.6 1.2 -0.4 0.77 2 2.3e+04 5.5e-06 1e+05 1 ++
Results saved in file nested_downtown_20~00.html
Results saved in file nested_downtown_20~00.pickle
print(results.short_summary())
Results for model nested_downtown_20
Nbr of parameters: 12
Sample size: 10000
Excluded data: 0
Null log likelihood: -29957.32
Final log likelihood: -22979.05
Likelihood ratio test (null): 13956.55
Rho square (null): 0.233
Rho bar square (null): 0.233
Akaike Information Criterion: 45982.09
Bayesian Information Criterion: 46068.62
estimated_parameters = results.getEstimatedParameters()
estimated_parameters
df, msg = compare(estimated_parameters)
print(df)
Name True Value Estimated Value T-Test
0 beta_rating 0.75 0.768725 -1.220940
1 beta_price -0.40 -0.403833 0.315295
2 beta_chinese 0.75 0.636120 1.580784
3 beta_japanese 1.25 1.197669 0.961672
4 beta_korean 0.75 0.651077 1.596322
5 beta_indian 1.00 0.926366 1.146833
6 beta_french 0.75 0.710729 0.799967
7 beta_mexican 1.25 1.199156 1.746107
8 beta_lebanese 0.75 0.726548 0.471487
9 beta_ethiopian 0.50 0.499967 0.000816
10 beta_log_dist -0.60 -0.603541 0.272169
11 mu_asian 2.00 1.978174 0.379049
print(msg)
Parameters not estimated: ['mu_downtown']
Total running time of the script: (2 minutes 13.658 seconds)