Choice model with a latent variable: maximum likelihood estimation

Mixture of logit. Measurement equation for the indicators. Maximum likelihood (full information) estimation.

author:

Michel Bierlaire, EPFL

date:

Thu Apr 13 18:02:26 2023

import sys
import biogeme.biogeme_logging as blog
import biogeme.biogeme as bio
from biogeme.exceptions import BiogemeError
from biogeme import models
import biogeme.distributions as dist
import biogeme.results as res
from biogeme.expressions import (
    Beta,
    log,
    RandomVariable,
    Integrate,
    Elem,
    bioNormalCdf,
    exp,
)
from read_or_estimate import read_or_estimate

from biogeme.data.optima import (
    read_data,
    age_65_more,
    moreThanOneCar,
    moreThanOneBike,
    individualHouse,
    male,
    haveChildren,
    haveGA,
    highEducation,
    WaitingTimePT,
    Envir01,
    Envir02,
    Envir03,
    Mobil11,
    Mobil14,
    Mobil16,
    Mobil17,
    Choice,
    TimePT_scaled,
    TimeCar_scaled,
    MarginalCostPT_scaled,
    CostCarCHF_scaled,
    distance_km_scaled,
    PurpHWH,
    PurpOther,
    ScaledIncome,
)


logger = blog.get_screen_logger(level=blog.INFO)
logger.info('Example b05latent_choice_full.py')
Example b05latent_choice_full.py

Read the estimates from the structural equation estimation.

MODELNAME = 'b02one_latent_ordered'
try:
    struct_results = res.bioResults(pickle_file=f'saved_results/{MODELNAME}.pickle')
except BiogemeError:
    print(
        f'Run first the script {MODELNAME}.py in order to generate the '
        f'file {MODELNAME}.pickle, and move it to the directory saved_results'
    )
    sys.exit()
struct_betas = struct_results.get_beta_values()

Coefficients

coef_intercept = Beta('coef_intercept', struct_betas['coef_intercept'], None, None, 0)
coef_age_65_more = Beta(
    'coef_age_65_more', struct_betas['coef_age_65_more'], None, None, 0
)
coef_haveGA = Beta('coef_haveGA', struct_betas['coef_haveGA'], None, None, 0)

coef_moreThanOneCar = Beta(
    'coef_moreThanOneCar', struct_betas['coef_moreThanOneCar'], None, None, 0
)
coef_moreThanOneBike = Beta(
    'coef_moreThanOneBike', struct_betas['coef_moreThanOneBike'], None, None, 0
)
coef_individualHouse = Beta(
    'coef_individualHouse', struct_betas['coef_individualHouse'], None, None, 0
)
coef_male = Beta('coef_male', struct_betas['coef_male'], None, None, 0)
coef_haveChildren = Beta(
    'coef_haveChildren', struct_betas['coef_haveChildren'], None, None, 0
)
coef_highEducation = Beta(
    'coef_highEducation', struct_betas['coef_highEducation'], None, None, 0
)

Latent variable: structural equation

Define a random parameter, normally distributed, designed to be used for numerical integration

omega = RandomVariable('omega')
density = dist.normalpdf(omega)
sigma_s = Beta('sigma_s', 1, None, None, 0)

Piecewise linear specification for income.

thresholds = [None, 4, 6, 8, 10, None]
betas_thresholds = [
    Beta(
        'beta_ScaledIncome_minus_inf_4',
        struct_betas['beta_ScaledIncome_minus_inf_4'],
        None,
        None,
        0,
    ),
    Beta(
        'beta_ScaledIncome_4_6',
        struct_betas['beta_ScaledIncome_4_6'],
        None,
        None,
        0,
    ),
    Beta(
        'beta_ScaledIncome_6_8',
        struct_betas['beta_ScaledIncome_6_8'],
        None,
        None,
        0,
    ),
    Beta(
        'beta_ScaledIncome_8_10',
        struct_betas['beta_ScaledIncome_8_10'],
        None,
        None,
        0,
    ),
    Beta(
        'beta_ScaledIncome_10_inf',
        struct_betas['beta_ScaledIncome_10_inf'],
        None,
        None,
        0,
    ),
]

formula_income = models.piecewise_formula(
    variable=ScaledIncome,
    thresholds=thresholds,
    betas=betas_thresholds,
)

CARLOVERS = (
    coef_intercept
    + coef_age_65_more * age_65_more
    + formula_income
    + coef_moreThanOneCar * moreThanOneCar
    + coef_moreThanOneBike * moreThanOneBike
    + coef_individualHouse * individualHouse
    + coef_male * male
    + coef_haveChildren * haveChildren
    + coef_haveGA * haveGA
    + coef_highEducation * highEducation
    + sigma_s * omega
)

Measurement equations.

Intercepts.

INTER_Envir01 = Beta('INTER_Envir01', 0, None, None, 1)
INTER_Envir02 = Beta('INTER_Envir02', 0, None, None, 0)
INTER_Envir03 = Beta('INTER_Envir03', 0, None, None, 0)
INTER_Mobil11 = Beta('INTER_Mobil11', 0, None, None, 0)
INTER_Mobil14 = Beta('INTER_Mobil14', 0, None, None, 0)
INTER_Mobil16 = Beta('INTER_Mobil16', 0, None, None, 0)
INTER_Mobil17 = Beta('INTER_Mobil17', 0, None, None, 0)

Coefficients.

B_Envir01_F1 = Beta('B_Envir01_F1', -1, None, None, 1)
B_Envir02_F1 = Beta('B_Envir02_F1', -1, None, None, 0)
B_Envir03_F1 = Beta('B_Envir03_F1', 1, None, None, 0)
B_Mobil11_F1 = Beta('B_Mobil11_F1', 1, None, None, 0)
B_Mobil14_F1 = Beta('B_Mobil14_F1', 1, None, None, 0)
B_Mobil16_F1 = Beta('B_Mobil16_F1', 1, None, None, 0)
B_Mobil17_F1 = Beta('B_Mobil17_F1', 1, None, None, 0)

Linear models.

MODEL_Envir01 = INTER_Envir01 + B_Envir01_F1 * CARLOVERS
MODEL_Envir02 = INTER_Envir02 + B_Envir02_F1 * CARLOVERS
MODEL_Envir03 = INTER_Envir03 + B_Envir03_F1 * CARLOVERS
MODEL_Mobil11 = INTER_Mobil11 + B_Mobil11_F1 * CARLOVERS
MODEL_Mobil14 = INTER_Mobil14 + B_Mobil14_F1 * CARLOVERS
MODEL_Mobil16 = INTER_Mobil16 + B_Mobil16_F1 * CARLOVERS
MODEL_Mobil17 = INTER_Mobil17 + B_Mobil17_F1 * CARLOVERS

Scale parameters

SIGMA_STAR_Envir01 = Beta('SIGMA_STAR_Envir01', 1, 1.0e-5, None, 1)
SIGMA_STAR_Envir02 = Beta('SIGMA_STAR_Envir02', 1, 1.0e-5, None, 0)
SIGMA_STAR_Envir03 = Beta('SIGMA_STAR_Envir03', 1, 1.0e-5, None, 0)
SIGMA_STAR_Mobil11 = Beta('SIGMA_STAR_Mobil11', 1, 1.0e-5, None, 0)
SIGMA_STAR_Mobil14 = Beta('SIGMA_STAR_Mobil14', 1, 1.0e-5, None, 0)
SIGMA_STAR_Mobil16 = Beta('SIGMA_STAR_Mobil16', 1, 1.0e-5, None, 0)
SIGMA_STAR_Mobil17 = Beta('SIGMA_STAR_Mobil17', 1, 1.0e-5, None, 0)

Symmetric thresholds.

delta_1 = Beta('delta_1', 0.1, 1.0e-5, None, 0)
delta_2 = Beta('delta_2', 0.2, 1.0e-5, None, 0)
tau_1 = -delta_1 - delta_2
tau_2 = -delta_1
tau_3 = delta_1
tau_4 = delta_1 + delta_2

Ordered probit models.

Envir01_tau_1 = (tau_1 - MODEL_Envir01) / SIGMA_STAR_Envir01
Envir01_tau_2 = (tau_2 - MODEL_Envir01) / SIGMA_STAR_Envir01
Envir01_tau_3 = (tau_3 - MODEL_Envir01) / SIGMA_STAR_Envir01
Envir01_tau_4 = (tau_4 - MODEL_Envir01) / SIGMA_STAR_Envir01
IndEnvir01 = {
    1: bioNormalCdf(Envir01_tau_1),
    2: bioNormalCdf(Envir01_tau_2) - bioNormalCdf(Envir01_tau_1),
    3: bioNormalCdf(Envir01_tau_3) - bioNormalCdf(Envir01_tau_2),
    4: bioNormalCdf(Envir01_tau_4) - bioNormalCdf(Envir01_tau_3),
    5: 1 - bioNormalCdf(Envir01_tau_4),
    6: 1.0,
    -1: 1.0,
    -2: 1.0,
}

P_Envir01 = Elem(IndEnvir01, Envir01)

Envir02_tau_1 = (tau_1 - MODEL_Envir02) / SIGMA_STAR_Envir02
Envir02_tau_2 = (tau_2 - MODEL_Envir02) / SIGMA_STAR_Envir02
Envir02_tau_3 = (tau_3 - MODEL_Envir02) / SIGMA_STAR_Envir02
Envir02_tau_4 = (tau_4 - MODEL_Envir02) / SIGMA_STAR_Envir02
IndEnvir02 = {
    1: bioNormalCdf(Envir02_tau_1),
    2: bioNormalCdf(Envir02_tau_2) - bioNormalCdf(Envir02_tau_1),
    3: bioNormalCdf(Envir02_tau_3) - bioNormalCdf(Envir02_tau_2),
    4: bioNormalCdf(Envir02_tau_4) - bioNormalCdf(Envir02_tau_3),
    5: 1 - bioNormalCdf(Envir02_tau_4),
    6: 1.0,
    -1: 1.0,
    -2: 1.0,
}

P_Envir02 = Elem(IndEnvir02, Envir02)

Envir03_tau_1 = (tau_1 - MODEL_Envir03) / SIGMA_STAR_Envir03
Envir03_tau_2 = (tau_2 - MODEL_Envir03) / SIGMA_STAR_Envir03
Envir03_tau_3 = (tau_3 - MODEL_Envir03) / SIGMA_STAR_Envir03
Envir03_tau_4 = (tau_4 - MODEL_Envir03) / SIGMA_STAR_Envir03
IndEnvir03 = {
    1: bioNormalCdf(Envir03_tau_1),
    2: bioNormalCdf(Envir03_tau_2) - bioNormalCdf(Envir03_tau_1),
    3: bioNormalCdf(Envir03_tau_3) - bioNormalCdf(Envir03_tau_2),
    4: bioNormalCdf(Envir03_tau_4) - bioNormalCdf(Envir03_tau_3),
    5: 1 - bioNormalCdf(Envir03_tau_4),
    6: 1.0,
    -1: 1.0,
    -2: 1.0,
}

P_Envir03 = Elem(IndEnvir03, Envir03)

Mobil11_tau_1 = (tau_1 - MODEL_Mobil11) / SIGMA_STAR_Mobil11
Mobil11_tau_2 = (tau_2 - MODEL_Mobil11) / SIGMA_STAR_Mobil11
Mobil11_tau_3 = (tau_3 - MODEL_Mobil11) / SIGMA_STAR_Mobil11
Mobil11_tau_4 = (tau_4 - MODEL_Mobil11) / SIGMA_STAR_Mobil11
IndMobil11 = {
    1: bioNormalCdf(Mobil11_tau_1),
    2: bioNormalCdf(Mobil11_tau_2) - bioNormalCdf(Mobil11_tau_1),
    3: bioNormalCdf(Mobil11_tau_3) - bioNormalCdf(Mobil11_tau_2),
    4: bioNormalCdf(Mobil11_tau_4) - bioNormalCdf(Mobil11_tau_3),
    5: 1 - bioNormalCdf(Mobil11_tau_4),
    6: 1.0,
    -1: 1.0,
    -2: 1.0,
}

P_Mobil11 = Elem(IndMobil11, Mobil11)

Mobil14_tau_1 = (tau_1 - MODEL_Mobil14) / SIGMA_STAR_Mobil14
Mobil14_tau_2 = (tau_2 - MODEL_Mobil14) / SIGMA_STAR_Mobil14
Mobil14_tau_3 = (tau_3 - MODEL_Mobil14) / SIGMA_STAR_Mobil14
Mobil14_tau_4 = (tau_4 - MODEL_Mobil14) / SIGMA_STAR_Mobil14
IndMobil14 = {
    1: bioNormalCdf(Mobil14_tau_1),
    2: bioNormalCdf(Mobil14_tau_2) - bioNormalCdf(Mobil14_tau_1),
    3: bioNormalCdf(Mobil14_tau_3) - bioNormalCdf(Mobil14_tau_2),
    4: bioNormalCdf(Mobil14_tau_4) - bioNormalCdf(Mobil14_tau_3),
    5: 1 - bioNormalCdf(Mobil14_tau_4),
    6: 1.0,
    -1: 1.0,
    -2: 1.0,
}

P_Mobil14 = Elem(IndMobil14, Mobil14)

Mobil16_tau_1 = (tau_1 - MODEL_Mobil16) / SIGMA_STAR_Mobil16
Mobil16_tau_2 = (tau_2 - MODEL_Mobil16) / SIGMA_STAR_Mobil16
Mobil16_tau_3 = (tau_3 - MODEL_Mobil16) / SIGMA_STAR_Mobil16
Mobil16_tau_4 = (tau_4 - MODEL_Mobil16) / SIGMA_STAR_Mobil16
IndMobil16 = {
    1: bioNormalCdf(Mobil16_tau_1),
    2: bioNormalCdf(Mobil16_tau_2) - bioNormalCdf(Mobil16_tau_1),
    3: bioNormalCdf(Mobil16_tau_3) - bioNormalCdf(Mobil16_tau_2),
    4: bioNormalCdf(Mobil16_tau_4) - bioNormalCdf(Mobil16_tau_3),
    5: 1 - bioNormalCdf(Mobil16_tau_4),
    6: 1.0,
    -1: 1.0,
    -2: 1.0,
}

P_Mobil16 = Elem(IndMobil16, Mobil16)

Mobil17_tau_1 = (tau_1 - MODEL_Mobil17) / SIGMA_STAR_Mobil17
Mobil17_tau_2 = (tau_2 - MODEL_Mobil17) / SIGMA_STAR_Mobil17
Mobil17_tau_3 = (tau_3 - MODEL_Mobil17) / SIGMA_STAR_Mobil17
Mobil17_tau_4 = (tau_4 - MODEL_Mobil17) / SIGMA_STAR_Mobil17
IndMobil17 = {
    1: bioNormalCdf(Mobil17_tau_1),
    2: bioNormalCdf(Mobil17_tau_2) - bioNormalCdf(Mobil17_tau_1),
    3: bioNormalCdf(Mobil17_tau_3) - bioNormalCdf(Mobil17_tau_2),
    4: bioNormalCdf(Mobil17_tau_4) - bioNormalCdf(Mobil17_tau_3),
    5: 1 - bioNormalCdf(Mobil17_tau_4),
    6: 1.0,
    -1: 1.0,
    -2: 1.0,
}

P_Mobil17 = Elem(IndMobil17, Mobil17)

Choice model Read the estimates from the sequential estimation, and use them as starting values

MODELNAME = 'b04latent_choice_seq'
try:
    choice_results = res.bioResults(pickle_file=f'saved_results/{MODELNAME}.pickle')
except BiogemeError:
    print(
        f'Run first the script {MODELNAME}.py in order to generate the '
        f'file {MODELNAME}.pickle, and move it to the directory saved_results'
    )
    sys.exit()
choice_betas = choice_results.get_beta_values()

Parameters to estimate. We use the previously estimated values as starting points.

ASC_CAR = Beta('ASC_CAR', choice_betas['ASC_CAR'], None, None, 0)
ASC_PT = Beta('ASC_PT', 0, None, None, 1)
ASC_SM = Beta('ASC_SM', choice_betas['ASC_SM'], None, None, 0)
BETA_COST_HWH = Beta('BETA_COST_HWH', choice_betas['BETA_COST_HWH'], None, None, 0)
BETA_COST_OTHER = Beta(
    'BETA_COST_OTHER', choice_betas['BETA_COST_OTHER'], None, None, 0
)
BETA_DIST = Beta('BETA_DIST', choice_betas['BETA_DIST'], None, None, 0)
BETA_TIME_CAR_REF = Beta(
    'BETA_TIME_CAR_REF', choice_betas['BETA_TIME_CAR_REF'], None, 0, 0
)
BETA_TIME_CAR_CL = Beta(
    'BETA_TIME_CAR_CL', choice_betas['BETA_TIME_CAR_CL'], None, None, 0
)
BETA_TIME_PT_REF = Beta(
    'BETA_TIME_PT_REF', choice_betas['BETA_TIME_PT_REF'], None, 0, 0
)
BETA_TIME_PT_CL = Beta(
    'BETA_TIME_PT_CL', choice_betas['BETA_TIME_PT_CL'], None, None, 0
)
BETA_WAITING_TIME = Beta(
    'BETA_WAITING_TIME', choice_betas['BETA_WAITING_TIME'], None, None, 0
)

Definition of the utility functions.

BETA_TIME_PT = BETA_TIME_PT_REF * exp(BETA_TIME_PT_CL * CARLOVERS)

V0 = (
    ASC_PT
    + BETA_TIME_PT * TimePT_scaled
    + BETA_WAITING_TIME * WaitingTimePT
    + BETA_COST_HWH * MarginalCostPT_scaled * PurpHWH
    + BETA_COST_OTHER * MarginalCostPT_scaled * PurpOther
)

BETA_TIME_CAR = BETA_TIME_CAR_REF * exp(BETA_TIME_CAR_CL * CARLOVERS)

V1 = (
    ASC_CAR
    + BETA_TIME_CAR * TimeCar_scaled
    + BETA_COST_HWH * CostCarCHF_scaled * PurpHWH
    + BETA_COST_OTHER * CostCarCHF_scaled * PurpOther
)

V2 = ASC_SM + BETA_DIST * distance_km_scaled

Associate utility functions with the numbering of alternatives.

V = {0: V0, 1: V1, 2: V2}

Conditional on omega, we have a logit model (called the kernel) for the choice.

condprob = models.logit(V, None, Choice)

Conditional on omega, we have the product of ordered probit for the indicators.

condlike = (
    P_Envir01
    * P_Envir02
    * P_Envir03
    * P_Mobil11
    * P_Mobil14
    * P_Mobil16
    * P_Mobil17
    * condprob
)

We integrate over omega using numerical integration.

loglike = log(Integrate(condlike * density, 'omega'))

Read data

database = read_data()

Create the Biogeme object

the_biogeme = bio.BIOGEME(database, loglike)
the_biogeme.modelName = 'b05latent_choice_full'
Biogeme parameters read from biogeme.toml.

If estimation results are saved on file, we read them to speed up the process. If not, we estimate the parameters.

results = read_or_estimate(the_biogeme=the_biogeme, directory='saved_results')
print(f'Estimated betas: {len(results.data.betaValues)}')
print(f'Final log likelihood: {results.data.logLike:.3f}')
print(f'Output file: {results.data.htmlFileName}')
Estimated betas: 45
Final log likelihood: -18384.429
Output file: b05latent_choice_full.html
results.get_estimated_parameters()
Value Rob. Std err Rob. t-test Rob. p-value
ASC_CAR 0.856592 0.098942 8.657523 0.000000e+00
ASC_SM 1.445479 0.206129 7.012503 2.341016e-12
BETA_COST_HWH -1.502301 0.163683 -9.178085 0.000000e+00
BETA_COST_OTHER -0.702120 0.087527 -8.021744 1.110223e-15
BETA_DIST -3.355619 0.263321 -12.743466 0.000000e+00
BETA_TIME_CAR_CL -1.559915 0.105751 -14.750773 0.000000e+00
BETA_TIME_CAR_REF -10.510488 0.576243 -18.239692 0.000000e+00
BETA_TIME_PT_CL -1.444572 0.109790 -13.157650 0.000000e+00
BETA_TIME_PT_REF -2.983441 0.264355 -11.285735 0.000000e+00
BETA_WAITING_TIME -0.028964 0.006157 -4.704311 2.547243e-06
B_Envir02_F1 -0.465197 0.035462 -13.118346 0.000000e+00
B_Envir03_F1 0.487628 0.034741 14.036236 0.000000e+00
B_Mobil11_F1 0.581722 0.038725 15.021999 0.000000e+00
B_Mobil14_F1 0.575294 0.034105 16.868154 0.000000e+00
B_Mobil16_F1 0.525909 0.033801 15.558837 0.000000e+00
B_Mobil17_F1 0.516376 0.036094 14.306431 0.000000e+00
INTER_Envir02 0.456450 0.034929 13.067964 0.000000e+00
INTER_Envir03 -0.363858 0.032022 -11.362893 0.000000e+00
INTER_Mobil11 0.402832 0.032632 12.344700 0.000000e+00
INTER_Mobil14 -0.170776 0.028712 -5.947816 2.717439e-09
INTER_Mobil16 0.141787 0.027891 5.083523 3.704968e-07
INTER_Mobil17 0.133760 0.029849 4.481276 7.419791e-06
SIGMA_STAR_Envir02 0.901791 0.032767 27.521397 0.000000e+00
SIGMA_STAR_Envir03 0.843870 0.030282 27.866677 0.000000e+00
SIGMA_STAR_Mobil11 0.880731 0.033273 26.470005 0.000000e+00
SIGMA_STAR_Mobil14 0.752138 0.027836 27.020714 0.000000e+00
SIGMA_STAR_Mobil16 0.862689 0.031725 27.192909 0.000000e+00
SIGMA_STAR_Mobil17 0.864560 0.032617 26.506670 0.000000e+00
beta_ScaledIncome_10_inf 0.115310 0.036185 3.186681 1.439155e-03
beta_ScaledIncome_4_6 -0.271862 0.108791 -2.498944 1.245640e-02
beta_ScaledIncome_6_8 0.305166 0.126603 2.410421 1.593412e-02
beta_ScaledIncome_8_10 -0.644381 0.148066 -4.351995 1.349043e-05
beta_ScaledIncome_minus_inf_4 0.141625 0.059384 2.384910 1.708328e-02
coef_age_65_more 0.032912 0.073928 0.445185 6.561862e-01
coef_haveChildren -0.047505 0.053153 -0.893743 3.714596e-01
coef_haveGA -0.658555 0.086478 -7.615320 2.620126e-14
coef_highEducation -0.222239 0.054374 -4.087251 4.365139e-05
coef_individualHouse -0.084054 0.053404 -1.573936 1.155022e-01
coef_intercept 0.360533 0.168440 2.140427 3.232031e-02
coef_male 0.070118 0.048409 1.448441 1.474937e-01
coef_moreThanOneBike -0.351012 0.061026 -5.751890 8.825124e-09
coef_moreThanOneCar 0.669274 0.058122 11.514993 0.000000e+00
delta_1 0.321361 0.009874 32.546698 0.000000e+00
delta_2 0.969100 0.026404 36.703366 0.000000e+00
sigma_s 0.818288 0.039082 20.937937 0.000000e+00


Total running time of the script: (0 minutes 0.409 seconds)

Gallery generated by Sphinx-Gallery