Note

Go to the end to download the full example code.

Choice model with the latent variable: maximum likelihood estimation

Mixture of logit. Measurement equation for the indicators. Maximum likelihood (full information) estimation.

author:: Michel Bierlaire, EPFL
date:: Fri Apr 14 10:07:43 2023

import sys
from functools import reduce
import biogeme.biogeme_logging as blog
import biogeme.biogeme as bio
import biogeme.exceptions as excep
import biogeme.distributions as dist
import biogeme.results as res
from biogeme import models
from biogeme.expressions import (
    Beta,
    Variable,
    log,
    RandomVariable,
    Integrate,
    Elem,
    bioNormalCdf,
    exp,
)

from read_or_estimate import read_or_estimate

from biogeme.data.optima import (
    read_data,
    male,
    age,
    haveChildren,
    highEducation,
    childCenter,
    childSuburb,
    SocioProfCat,
    WaitingTimePT,
    Choice,
    TimePT_scaled,
    TimeCar_scaled,
    MarginalCostPT_scaled,
    CostCarCHF_scaled,
    distance_km_scaled,
    PurpHWH,
    PurpOther,
)

logger = blog.get_screen_logger(level=blog.INFO)
logger.info('Example m03_simultaneous_estimation.py')

Example m03_simultaneous_estimation.py

Read the estimates from the structural equation estimation.

MODELNAME = 'm01_latent_variable'
try:
    struct_results = res.bioResults(pickle_file=f'saved_results/{MODELNAME}.pickle')
except excep.BiogemeError:
    print(
        f'Run first the script {MODELNAME}.py in order to generate the '
        f'file {MODELNAME}.pickle, and move it to the directory saved_results'
    )
    sys.exit()
struct_betas = struct_results.get_beta_values()

Coefficients

coef_intercept = Beta('coef_intercept', struct_betas['coef_intercept'], None, None, 0)
coef_age_30_less = Beta(
    'coef_age_30_less', struct_betas['coef_age_30_less'], None, None, 0
)
coef_haveChildren = Beta(
    'coef_haveChildren', struct_betas['coef_haveChildren'], None, None, 0
)
coef_highEducation = Beta(
    'coef_highEducation', struct_betas['coef_highEducation'], None, None, 0
)
coef_artisans = Beta('coef_artisans', struct_betas['coef_artisans'], None, None, 0)
coef_employees = Beta('coef_employees', struct_betas['coef_employees'], None, None, 0)
coef_male = Beta('coef_male', struct_betas['coef_male'], None, None, 0)
coef_child_center = Beta(
    'coef_child_center', struct_betas['coef_child_center'], None, None, 0
)
coef_child_suburb = Beta(
    'coef_child_suburb', struct_betas['coef_child_suburb'], None, None, 0
)

Latent variable: structural equation

Define a random parameter, normally distributed, designed to be used for numerical integration

omega = RandomVariable('omega')
density = dist.normalpdf(omega)
sigma_s = Beta('sigma_s', 1, None, None, 0)

ACTIVELIFE = (
    coef_intercept
    + coef_child_center * childCenter
    + coef_child_suburb * childSuburb
    + coef_highEducation * highEducation
    + coef_artisans * (SocioProfCat == 5)
    + coef_employees * (SocioProfCat == 6)
    + coef_age_30_less * (age <= 30)
    + coef_male * male
    + coef_haveChildren * haveChildren
    + sigma_s * omega
)

Measurement equations

indicators = [
    'ResidCh01',
    'ResidCh04',
    'ResidCh05',
    'ResidCh06',
    'LifSty07',
    'LifSty10',
]

We define the intercept parameters. The first one is normalized to 0.

inter = {k: Beta(f'inter_{k}', 0, None, None, 0) for k in indicators[1:]}
inter[indicators[0]] = Beta(f'INTER_{indicators[0]}', 0, None, None, 1)

We define the coefficients. The first one is normalized to 1.

coefficients = {k: Beta(f'coeff_{k}', 0, None, None, 0) for k in indicators[1:]}
coefficients[indicators[0]] = Beta(f'B_{indicators[0]}', 1, None, None, 1)

We define the measurement equations for each indicator

linear_models = {k: inter[k] + coefficients[k] * ACTIVELIFE for k in indicators}

We define the scale parameters of the error terms.

sigma_star = {k: Beta(f'sigma_star_{k}', 1, 1.0e-5, None, 0) for k in indicators[1:]}
sigma_star[indicators[0]] = Beta(f'sigma_star_{indicators[0]}', 1, None, None, 1)

Symmetric threshold.

delta_1 = Beta('delta_1', 0.1, 1.0e-5, None, 0)
delta_2 = Beta('delta_2', 0.2, 1.0e-5, None, 0)
tau_1 = -delta_1 - delta_2
tau_2 = -delta_1
tau_3 = delta_1
tau_4 = delta_1 + delta_2

Ordered probit models.

tau_1_residual = {k: (tau_1 - linear_models[k]) / sigma_star[k] for k in indicators}
tau_2_residual = {k: (tau_2 - linear_models[k]) / sigma_star[k] for k in indicators}
tau_3_residual = {k: (tau_3 - linear_models[k]) / sigma_star[k] for k in indicators}
tau_4_residual = {k: (tau_4 - linear_models[k]) / sigma_star[k] for k in indicators}
dict_prob_indicators = {
    k: {
        1: bioNormalCdf(tau_1_residual[k]),
        2: bioNormalCdf(tau_2_residual[k]) - bioNormalCdf(tau_1_residual[k]),
        3: bioNormalCdf(tau_3_residual[k]) - bioNormalCdf(tau_2_residual[k]),
        4: bioNormalCdf(tau_4_residual[k]) - bioNormalCdf(tau_3_residual[k]),
        5: 1 - bioNormalCdf(tau_4_residual[k]),
        6: 1.0,
        -1: 1.0,
        -2: 1.0,
    }
    for k in indicators
}

Product of the likelihood of the indicators.

prob_indicators = reduce(
    lambda x, y: x * Elem(dict_prob_indicators[y], Variable(y)),
    indicators,
    Elem(dict_prob_indicators[indicators[0]], Variable(indicators[0])),
)

Choice model Read the estimates from the sequential estimation, and use them as starting values

MODELNAME = 'm02_sequential_estimation'
try:
    choice_results = res.bioResults(pickle_file=f'saved_results/{MODELNAME}.pickle')
except excep.BiogemeError:
    print(
        f'Run first the script {MODELNAME}.py in order to generate the '
        f'file {MODELNAME}.pickle, and move it to the directory saved_results'
    )
    sys.exit()
choice_betas = choice_results.get_beta_values()

ASC_CAR = Beta('ASC_CAR', choice_betas['ASC_CAR'], None, None, 0)
ASC_PT = Beta('ASC_PT', 0, None, None, 1)
ASC_SM = Beta('ASC_SM', choice_betas['ASC_SM'], None, None, 0)
BETA_COST_HWH = Beta('BETA_COST_HWH', choice_betas['BETA_COST_HWH'], None, None, 0)
BETA_COST_OTHER = Beta(
    'BETA_COST_OTHER', choice_betas['BETA_COST_OTHER'], None, None, 0
)
BETA_DIST = Beta('BETA_DIST', choice_betas['BETA_DIST'], None, None, 0)
BETA_TIME_CAR_REF = Beta(
    'BETA_TIME_CAR_REF', choice_betas['BETA_TIME_CAR_REF'], None, 0, 0
)
BETA_TIME_CAR_AL = Beta(
    'BETA_TIME_CAR_AL', choice_betas['BETA_TIME_CAR_AL'], None, None, 0
)
BETA_TIME_PT_REF = Beta(
    'BETA_TIME_PT_REF', choice_betas['BETA_TIME_PT_REF'], None, 0, 0
)
BETA_TIME_CAR = BETA_TIME_CAR_REF * exp(BETA_TIME_CAR_AL * ACTIVELIFE)
BETA_TIME_PT_AL = Beta(
    'BETA_TIME_PT_AL', choice_betas['BETA_TIME_PT_AL'], None, None, 0
)
BETA_TIME_PT = BETA_TIME_PT_REF * exp(BETA_TIME_PT_AL * ACTIVELIFE)
BETA_WAITING_TIME = Beta(
    'BETA_WAITING_TIME', choice_betas['BETA_WAITING_TIME'], None, None, 0
)

Definition of utility functions:

V0 = (
    ASC_PT
    + BETA_TIME_PT * TimePT_scaled
    + BETA_WAITING_TIME * WaitingTimePT
    + BETA_COST_HWH * MarginalCostPT_scaled * PurpHWH
    + BETA_COST_OTHER * MarginalCostPT_scaled * PurpOther
)

V1 = (
    ASC_CAR
    + BETA_TIME_CAR * TimeCar_scaled
    + BETA_COST_HWH * CostCarCHF_scaled * PurpHWH
    + BETA_COST_OTHER * CostCarCHF_scaled * PurpOther
)

V2 = ASC_SM + BETA_DIST * distance_km_scaled

Associate utility functions with the numbering of alternatives

V = {0: V0, 1: V1, 2: V2}

Conditional on omega, we have a logit model (called the kernel) for the choice

condprob = models.logit(V, None, Choice)

Conditional on omega, we have the product of ordered probit for the indicators.

condlike = prob_indicators * condprob

We integrate over omega using numerical integration

loglike = log(Integrate(condlike * density, 'omega'))

Read the data

database = read_data()

Create the Biogeme object

the_biogeme = bio.BIOGEME(database, loglike)
the_biogeme.modelName = 'm03_simultaneous_estimation'

Biogeme parameters read from biogeme.toml.

If estimation results are saved on file, we read them to speed up the process. If not, we estimate the parameters.

results = read_or_estimate(the_biogeme=the_biogeme, directory='saved_results')

print(results.short_summary())

Results for model m03_simultaneous_estimation
Nbr of parameters:              37
Sample size:                    1906
Excluded data:                  0
Final log likelihood:           -15846.32
Akaike Information Criterion:   31766.65
Bayesian Information Criterion: 31972.1

print(f'Final log likelihood: {results.data.logLike:.3f}')
print(f'Output file: {results.data.htmlFileName}')

Final log likelihood: -15846.325
Output file: m03_simultaneous_estimation.html

results.get_estimated_parameters()

	Value	Active bound	Rob. Std err	Rob. t-test	Rob. p-value
ASC_CAR	1.078918	0.0	0.078590	13.728360	0.000000e+00
ASC_SM	0.495610	0.0	0.138355	3.582161	3.407635e-04
BETA_COST_HWH	-1.286691	0.0	0.110560	-11.637892	0.000000e+00
BETA_COST_OTHER	-0.688205	0.0	0.049779	-13.825129	0.000000e+00
BETA_DIST	-1.102123	0.0	0.039651	-27.795263	0.000000e+00
BETA_TIME_CAR_AL	0.065707	0.0	0.011882	5.529763	3.206630e-08
BETA_TIME_CAR_REF	-5.640831	0.0	0.301038	-18.737921	0.000000e+00
BETA_TIME_PT_AL	0.954420	0.0	1.336968	0.713869	4.753079e-01
BETA_TIME_PT_REF	0.000000	1.0	0.000150	0.000000	1.000000e+00
BETA_WAITING_TIME	-0.048823	0.0	0.004250	-11.487878	0.000000e+00
coef_age_30_less	1.703561	0.0	0.290199	5.870325	4.349426e-09
coef_artisans	0.055635	0.0	0.407021	0.136688	8.912775e-01
coef_child_center	0.087127	0.0	0.283146	0.307711	7.583020e-01
coef_child_suburb	-0.237714	0.0	0.209143	-1.136611	2.557010e-01
coef_employees	-0.257955	0.0	0.194097	-1.329006	1.838461e-01
coef_haveChildren	0.106247	0.0	0.183755	0.578199	5.631298e-01
coef_highEducation	-0.446046	0.0	0.215071	-2.073953	3.808368e-02
coef_intercept	-1.717403	0.0	0.237067	-7.244378	4.343192e-13
coef_male	0.341038	0.0	0.184398	1.849470	6.438996e-02
coeff_LifSty07	0.240909	0.0	0.027995	8.605329	0.000000e+00
coeff_LifSty10	0.215288	0.0	0.022921	9.392652	0.000000e+00
coeff_ResidCh04	0.227442	0.0	0.021304	10.675932	0.000000e+00
coeff_ResidCh05	0.606470	0.0	0.035911	16.887987	0.000000e+00
coeff_ResidCh06	0.393956	0.0	0.033154	11.882593	0.000000e+00
delta_1	1.742328	0.0	0.105509	16.513577	0.000000e+00
delta_2	3.580393	0.0	0.212203	16.872496	0.000000e+00
inter_LifSty07	-2.760738	0.0	0.197785	-13.958285	0.000000e+00
inter_LifSty10	-0.662712	0.0	0.104170	-6.361860	1.993248e-10
inter_ResidCh04	0.650264	0.0	0.098010	6.634697	3.251710e-11
inter_ResidCh05	-4.989316	0.0	0.318020	-15.688694	0.000000e+00
inter_ResidCh06	-1.350119	0.0	0.155015	-8.709580	0.000000e+00
sigma_s	3.531522	0.0	0.238665	14.796995	0.000000e+00
sigma_star_LifSty07	3.792475	0.0	0.240744	15.753126	0.000000e+00
sigma_star_LifSty10	3.320221	0.0	0.208423	15.930223	0.000000e+00
sigma_star_ResidCh04	3.161298	0.0	0.197915	15.972993	0.000000e+00
sigma_star_ResidCh05	4.111465	0.0	0.261640	15.714229	0.000000e+00
sigma_star_ResidCh06	4.516876	0.0	0.287576	15.706714	0.000000e+00

Total running time of the script: (0 minutes 0.358 seconds)

Gallery generated by Sphinx-Gallery