Nested logit

Estimation of a nested logit model using sampling of alternatives.

author:: Michel Bierlaire
date:: Wed Nov 1 18:00:15 2023

import pandas as pd
from biogeme.sampling_of_alternatives import (
    SamplingContext,
    ChoiceSetsGeneration,
    GenerateModel,
    generate_segment_size,
)
from biogeme.expressions import Beta
from biogeme.nests import OneNestForNestedLogit, NestsForNestedLogit
import biogeme.biogeme_logging as blog
import biogeme.biogeme as bio
from specification_sampling import V, combined_variables
from compare import compare
from alternatives import (
    alternatives,
    ID_COLUMN,
    partitions,
    asian,
    all_alternatives,
)

logger = blog.get_screen_logger(level=blog.INFO)

SAMPLE_SIZE = 20  # out of 100
SAMPLE_SIZE_MEV = 33  # out of 33
CHOICE_COLUMN = 'nested_0'
PARTITION = 'downtown'
MEV_PARTITION = 'uniform_asian'
MODEL_NAME = f'nested_{PARTITION}_{SAMPLE_SIZE}'
FILE_NAME = f'{MODEL_NAME}.dat'

the_partition = partitions.get(PARTITION)
if the_partition is None:
    raise ValueError(f'Unknown partition: {PARTITION}')

segment_sizes = generate_segment_size(SAMPLE_SIZE, the_partition.number_of_segments())

We use all alternatives in the nest.

mev_partition = partitions.get(MEV_PARTITION)
if mev_partition is None:
    raise ValueError(f'Unknown partition: {MEV_PARTITION}')
mev_segment_sizes = [SAMPLE_SIZE_MEV]

observations = pd.read_csv('obs_choice.dat')

context = SamplingContext(
    the_partition=the_partition,
    sample_sizes=segment_sizes,
    individuals=observations,
    choice_column=CHOICE_COLUMN,
    alternatives=alternatives,
    id_column=ID_COLUMN,
    biogeme_file_name=FILE_NAME,
    utility_function=V,
    combined_variables=combined_variables,
    mev_partition=mev_partition,
    mev_sample_sizes=mev_segment_sizes,
)

logger.info(context.reporting())

Size of the choice set: 100
Main partition: 2 segment(s) of size 46, 54
Main sample: 20: 10/46, 10/54
Nbr of MEV alternatives: 33
MEV partition: 1 segment(s) of size 33
MEV sample: 33: 33/33

the_data_generation = ChoiceSetsGeneration(context=context)
the_model_generation = GenerateModel(context=context)

biogeme_database = the_data_generation.sample_and_merge(recycle=False)

Generating 20 + 33 alternatives for 10000 observations

  0%|          | 0/10000 [00:00<?, ?it/s]
  1%|          | 80/10000 [00:00<00:12, 794.30it/s]
  2%|▏         | 162/10000 [00:00<00:12, 809.32it/s]
  2%|▏         | 243/10000 [00:00<00:12, 802.39it/s]
  3%|▎         | 324/10000 [00:00<00:12, 802.40it/s]
  4%|▍         | 406/10000 [00:00<00:11, 807.01it/s]
  5%|▍         | 487/10000 [00:00<00:11, 802.54it/s]
  6%|▌         | 568/10000 [00:00<00:11, 797.74it/s]
  6%|▋         | 648/10000 [00:00<00:11, 794.93it/s]
  7%|▋         | 729/10000 [00:00<00:11, 798.59it/s]
  8%|▊         | 809/10000 [00:01<00:11, 794.76it/s]
  9%|▉         | 889/10000 [00:01<00:11, 795.77it/s]
 10%|▉         | 970/10000 [00:01<00:11, 798.77it/s]
 11%|█         | 1052/10000 [00:01<00:11, 804.48it/s]
 11%|█▏        | 1133/10000 [00:01<00:11, 801.92it/s]
 12%|█▏        | 1214/10000 [00:01<00:10, 800.67it/s]
 13%|█▎        | 1295/10000 [00:01<00:10, 792.49it/s]
 14%|█▍        | 1376/10000 [00:01<00:10, 794.83it/s]
 15%|█▍        | 1456/10000 [00:01<00:10, 792.32it/s]
 15%|█▌        | 1536/10000 [00:01<00:10, 790.00it/s]
 16%|█▌        | 1616/10000 [00:02<00:10, 785.48it/s]
 17%|█▋        | 1696/10000 [00:02<00:10, 788.59it/s]
 18%|█▊        | 1775/10000 [00:02<00:10, 785.20it/s]
 19%|█▊        | 1855/10000 [00:02<00:10, 787.53it/s]
 19%|█▉        | 1935/10000 [00:02<00:10, 790.90it/s]
 20%|██        | 2015/10000 [00:02<00:10, 785.24it/s]
 21%|██        | 2094/10000 [00:02<00:10, 770.94it/s]
 22%|██▏       | 2172/10000 [00:02<00:10, 766.16it/s]
 22%|██▏       | 2249/10000 [00:02<00:10, 757.04it/s]
 23%|██▎       | 2325/10000 [00:02<00:10, 750.69it/s]
 24%|██▍       | 2402/10000 [00:03<00:10, 756.29it/s]
 25%|██▍       | 2481/10000 [00:03<00:09, 765.47it/s]
 26%|██▌       | 2560/10000 [00:03<00:09, 772.20it/s]
 26%|██▋       | 2641/10000 [00:03<00:09, 783.29it/s]
 27%|██▋       | 2720/10000 [00:03<00:09, 781.71it/s]
 28%|██▊       | 2800/10000 [00:03<00:09, 785.64it/s]
 29%|██▉       | 2879/10000 [00:03<00:09, 785.86it/s]
 30%|██▉       | 2961/10000 [00:03<00:08, 794.53it/s]
 30%|███       | 3042/10000 [00:03<00:08, 796.98it/s]
 31%|███       | 3124/10000 [00:03<00:08, 801.18it/s]
 32%|███▏      | 3205/10000 [00:04<00:08, 800.60it/s]
 33%|███▎      | 3286/10000 [00:04<00:08, 796.99it/s]
 34%|███▎      | 3366/10000 [00:04<00:08, 797.69it/s]
 34%|███▍      | 3446/10000 [00:04<00:08, 792.94it/s]
 35%|███▌      | 3526/10000 [00:04<00:08, 794.35it/s]
 36%|███▌      | 3607/10000 [00:04<00:08, 798.98it/s]
 37%|███▋      | 3687/10000 [00:04<00:07, 795.83it/s]
 38%|███▊      | 3769/10000 [00:04<00:07, 800.45it/s]
 38%|███▊      | 3850/10000 [00:04<00:07, 795.01it/s]
 39%|███▉      | 3931/10000 [00:04<00:07, 798.00it/s]
 40%|████      | 4011/10000 [00:05<00:07, 794.91it/s]
 41%|████      | 4091/10000 [00:05<00:07, 794.11it/s]
 42%|████▏     | 4171/10000 [00:05<00:07, 794.85it/s]
 43%|████▎     | 4251/10000 [00:05<00:07, 794.33it/s]
 43%|████▎     | 4332/10000 [00:05<00:07, 797.84it/s]
 44%|████▍     | 4412/10000 [00:05<00:07, 796.59it/s]
 45%|████▍     | 4492/10000 [00:05<00:06, 796.48it/s]
 46%|████▌     | 4572/10000 [00:05<00:06, 792.33it/s]
 47%|████▋     | 4652/10000 [00:05<00:06, 793.64it/s]
 47%|████▋     | 4732/10000 [00:05<00:06, 793.78it/s]
 48%|████▊     | 4813/10000 [00:06<00:06, 797.15it/s]
 49%|████▉     | 4894/10000 [00:06<00:06, 800.06it/s]
 50%|████▉     | 4975/10000 [00:06<00:06, 801.56it/s]
 51%|█████     | 5056/10000 [00:06<00:06, 800.16it/s]
 51%|█████▏    | 5137/10000 [00:06<00:06, 799.02it/s]
 52%|█████▏    | 5217/10000 [00:06<00:06, 796.81it/s]
 53%|█████▎    | 5297/10000 [00:06<00:05, 795.42it/s]
 54%|█████▍    | 5377/10000 [00:06<00:05, 794.28it/s]
 55%|█████▍    | 5458/10000 [00:06<00:05, 798.62it/s]
 55%|█████▌    | 5538/10000 [00:06<00:05, 796.06it/s]
 56%|█████▌    | 5618/10000 [00:07<00:05, 795.52it/s]
 57%|█████▋    | 5698/10000 [00:07<00:05, 794.26it/s]
 58%|█████▊    | 5778/10000 [00:07<00:05, 788.48it/s]
 59%|█████▊    | 5857/10000 [00:07<00:05, 786.74it/s]
 59%|█████▉    | 5937/10000 [00:07<00:05, 789.87it/s]
 60%|██████    | 6016/10000 [00:07<00:05, 788.95it/s]
 61%|██████    | 6096/10000 [00:07<00:04, 791.09it/s]
 62%|██████▏   | 6176/10000 [00:07<00:04, 788.79it/s]
 63%|██████▎   | 6256/10000 [00:07<00:04, 790.96it/s]
 63%|██████▎   | 6336/10000 [00:08<00:04, 792.56it/s]
 64%|██████▍   | 6416/10000 [00:08<00:04, 792.67it/s]
 65%|██████▍   | 6497/10000 [00:08<00:04, 796.61it/s]
 66%|██████▌   | 6577/10000 [00:08<00:04, 792.19it/s]
 67%|██████▋   | 6657/10000 [00:08<00:04, 789.39it/s]
 67%|██████▋   | 6738/10000 [00:08<00:04, 794.25it/s]
 68%|██████▊   | 6818/10000 [00:08<00:04, 790.89it/s]
 69%|██████▉   | 6898/10000 [00:08<00:03, 792.43it/s]
 70%|██████▉   | 6978/10000 [00:08<00:03, 794.19it/s]
 71%|███████   | 7058/10000 [00:08<00:03, 793.50it/s]
 71%|███████▏  | 7139/10000 [00:09<00:03, 795.46it/s]
 72%|███████▏  | 7219/10000 [00:09<00:03, 796.49it/s]
 73%|███████▎  | 7299/10000 [00:09<00:03, 796.52it/s]
 74%|███████▍  | 7380/10000 [00:09<00:03, 797.97it/s]
 75%|███████▍  | 7460/10000 [00:09<00:03, 796.74it/s]
 75%|███████▌  | 7541/10000 [00:09<00:03, 800.13it/s]
 76%|███████▌  | 7622/10000 [00:09<00:02, 801.85it/s]
 77%|███████▋  | 7703/10000 [00:09<00:02, 796.69it/s]
 78%|███████▊  | 7784/10000 [00:09<00:02, 798.53it/s]
 79%|███████▊  | 7864/10000 [00:09<00:02, 795.47it/s]
 79%|███████▉  | 7944/10000 [00:10<00:02, 792.58it/s]
 80%|████████  | 8024/10000 [00:10<00:02, 793.29it/s]
 81%|████████  | 8104/10000 [00:10<00:02, 794.29it/s]
 82%|████████▏ | 8184/10000 [00:10<00:02, 795.60it/s]
 83%|████████▎ | 8266/10000 [00:10<00:02, 801.06it/s]
 83%|████████▎ | 8347/10000 [00:10<00:02, 797.27it/s]
 84%|████████▍ | 8427/10000 [00:10<00:01, 794.30it/s]
 85%|████████▌ | 8507/10000 [00:10<00:01, 790.25it/s]
 86%|████████▌ | 8587/10000 [00:10<00:01, 790.93it/s]
 87%|████████▋ | 8667/10000 [00:10<00:01, 791.47it/s]
 87%|████████▋ | 8747/10000 [00:11<00:01, 792.64it/s]
 88%|████████▊ | 8827/10000 [00:11<00:01, 793.65it/s]
 89%|████████▉ | 8908/10000 [00:11<00:01, 798.37it/s]
 90%|████████▉ | 8989/10000 [00:11<00:01, 800.95it/s]
 91%|█████████ | 9070/10000 [00:11<00:01, 800.38it/s]
 92%|█████████▏| 9152/10000 [00:11<00:01, 804.64it/s]
 92%|█████████▏| 9233/10000 [00:11<00:00, 801.85it/s]
 93%|█████████▎| 9314/10000 [00:11<00:00, 803.86it/s]
 94%|█████████▍| 9395/10000 [00:11<00:00, 802.79it/s]
 95%|█████████▍| 9476/10000 [00:11<00:00, 804.63it/s]
 96%|█████████▌| 9557/10000 [00:12<00:00, 802.73it/s]
 96%|█████████▋| 9638/10000 [00:12<00:00, 800.25it/s]
 97%|█████████▋| 9719/10000 [00:12<00:00, 799.25it/s]
 98%|█████████▊| 9801/10000 [00:12<00:00, 804.38it/s]
 99%|█████████▉| 9882/10000 [00:12<00:00, 803.90it/s]
100%|█████████▉| 9963/10000 [00:12<00:00, 804.60it/s]
100%|██████████| 10000/10000 [00:13<00:00, 738.33it/s]
Define new variables

Defining new variables...:   0%|          | 0/20 [00:00<?, ?it/s]
Defining new variables...:   5%|▌         | 1/20 [00:00<00:05,  3.37it/s]
Defining new variables...:  10%|█         | 2/20 [00:00<00:05,  3.19it/s]
Defining new variables...:  15%|█▌        | 3/20 [00:00<00:05,  3.13it/s]
Defining new variables...:  20%|██        | 4/20 [00:01<00:05,  3.11it/s]
Defining new variables...:  25%|██▌       | 5/20 [00:01<00:04,  3.06it/s]
Defining new variables...:  30%|███       | 6/20 [00:01<00:04,  3.06it/s]
Defining new variables...:  35%|███▌      | 7/20 [00:02<00:04,  3.06it/s]
Defining new variables...:  40%|████      | 8/20 [00:02<00:03,  3.05it/s]
Defining new variables...:  45%|████▌     | 9/20 [00:02<00:03,  3.04it/s]
Defining new variables...:  50%|█████     | 10/20 [00:03<00:03,  3.03it/s]
Defining new variables...:  55%|█████▌    | 11/20 [00:03<00:02,  3.04it/s]
Defining new variables...:  60%|██████    | 12/20 [00:03<00:02,  3.04it/s]
Defining new variables...:  65%|██████▌   | 13/20 [00:04<00:02,  3.04it/s]
Defining new variables...:  70%|███████   | 14/20 [00:04<00:01,  3.04it/s]
Defining new variables...:  75%|███████▌  | 15/20 [00:04<00:01,  3.04it/s]
Defining new variables...:  80%|████████  | 16/20 [00:05<00:01,  3.04it/s]
Defining new variables...:  85%|████████▌ | 17/20 [00:05<00:00,  3.04it/s]
Defining new variables...:  90%|█████████ | 18/20 [00:05<00:00,  3.04it/s]
Defining new variables...:  95%|█████████▌| 19/20 [00:06<00:00,  3.04it/s]
Defining new variables...: 100%|██████████| 20/20 [00:06<00:00,  3.02it/s]
Defining new variables...: 100%|██████████| 20/20 [00:17<00:00,  1.13it/s]
File nested_downtown_20.dat has been created.

Definition of the nest.

mu_asian = Beta('mu_asian', 1.0, 1.0, None, 0)
nest_asian = OneNestForNestedLogit(
    nest_param=mu_asian, list_of_alternatives=asian, name='asian'
)
nests = NestsForNestedLogit(
    choice_set=all_alternatives,
    tuple_of_nests=(nest_asian,),
)

The following elements do not appear in any nest and are assumed each to be alone in a separate nest: {2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 32, 35, 36, 38, 39, 41, 42, 43, 44, 46, 48, 49, 52, 53, 54, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 71, 73, 74, 75, 77, 82, 83, 84, 85, 86, 88, 90, 93, 95, 96, 97, 99}. If it is not the intention, check the assignment of alternatives to nests.

logprob = the_model_generation.get_nested_logit(nests)

the_biogeme = bio.BIOGEME(biogeme_database, logprob)
the_biogeme.modelName = MODEL_NAME

Biogeme parameters read from biogeme.toml.

Calculate the null log likelihood for reporting.

the_biogeme.calculate_null_loglikelihood(
    {i: 1 for i in range(context.total_sample_size)}
)

np.float64(-29957.32273553647)

Estimate the parameters

results = the_biogeme.estimate(recycle=False)

As the model is not too complex, we activate the calculation of second derivatives. If you want to change it, change the name of the algorithm in the TOML file from "automatic" to "simple_bounds"
*** Initial values of the parameters are obtained from the file __nested_downtown_20.iter
Cannot read file __nested_downtown_20.iter. Statement is ignored.
As the model is not too complex, we activate the calculation of second derivatives. If you want to change it, change the name of the algorithm in the TOML file from "automatic" to "simple_bounds"
Optimization algorithm: hybrid Newton/BFGS with simple bounds [simple_bounds]
** Optimization: Newton with trust region for simple bounds
Iter.    beta_chinese  beta_ethiopian     beta_french     beta_indian   beta_japanese     beta_korean   beta_lebanese   beta_log_dist    beta_mexican      beta_price     beta_rating        mu_asian     Function    Relgrad   Radius      Rho
    0          -0.094          -0.044           0.023           -0.12            0.12          -0.067          0.0027              -1            0.68           -0.51            0.47             1.2      2.4e+04      0.081       10     0.92   ++
    1            0.18            0.52            0.83            0.39               1            0.24            0.78           -0.61             1.2            -0.4            0.76             1.4      2.3e+04      0.024    1e+02        1   ++
    2            0.52            0.51            0.76            0.83             1.1            0.56            0.72           -0.62             1.2           -0.42            0.79             1.7      2.3e+04      0.012    1e+03      1.1   ++
    3            0.63             0.5            0.75            0.91             1.2            0.64            0.72            -0.6             1.2           -0.41            0.77             1.9      2.3e+04     0.0032    1e+04      1.2   ++
    4            0.69             0.5            0.75            0.97             1.2            0.69            0.71           -0.59             1.2           -0.41            0.76               2      2.3e+04    0.00042    1e+05      1.1   ++
    5            0.69             0.5            0.75            0.97             1.2            0.69            0.71           -0.59             1.2           -0.41            0.76               2      2.3e+04    6.3e-06    1e+05        1   ++
Results saved in file nested_downtown_20.html
Results saved in file nested_downtown_20.pickle

print(results.short_summary())

Results for model nested_downtown_20
Nbr of parameters:              12
Sample size:                    10000
Excluded data:                  0
Null log likelihood:            -29957.32
Final log likelihood:           -22979.19
Likelihood ratio test (null):           13956.26
Rho square (null):                      0.233
Rho bar square (null):                  0.233
Akaike Information Criterion:   45982.39
Bayesian Information Criterion: 46068.91

estimated_parameters = results.get_estimated_parameters()
estimated_parameters

	Value	Rob. Std err	Rob. t-test
beta_chinese	0.700229	0.071300	9.820906
beta_ethiopian	0.503377	0.040232	12.511976
beta_french	0.744952	0.048852	15.249200
beta_indian	0.976977	0.063607	15.359560
beta_japanese	1.236949	0.054294	22.782394
beta_korean	0.696263	0.061596	11.303638
beta_lebanese	0.713084	0.049902	14.289703
beta_log_dist	-0.588648	0.012729	-46.243923
beta_mexican	1.204193	0.029153	41.306497
beta_price	-0.404956	0.012275	-32.989608
beta_rating	0.763591	0.015256	50.052379
mu_asian	2.019067	0.059071	34.180512

df, msg = compare(estimated_parameters)

print(df)

              Name  True Value  Estimated Value    T-Test
    beta_rating        0.75         0.763591 -0.890885
     beta_price       -0.40        -0.404956  0.403743
   beta_chinese        0.75         0.700229  0.698055
  beta_japanese        1.25         1.236949  0.240375
    beta_korean        0.75         0.696263  0.872402
    beta_indian        1.00         0.976977  0.361951
    beta_french        0.75         0.744952  0.103343
   beta_mexican        1.25         1.204193  1.571277
  beta_lebanese        0.75         0.713084  0.739766
 beta_ethiopian        0.50         0.503377 -0.083940
 beta_log_dist       -0.60        -0.588648 -0.891786
      mu_asian        2.00         2.019067 -0.322789

print(msg)

Parameters not estimated: ['mu_downtown']

Total running time of the script: (1 minutes 16.858 seconds)

Gallery generated by Sphinx-Gallery