Note
Go to the end to download the full example code.
Investigation of the estimation problem
This file is an updated version of 07problem.py, where the probabilities are simulated in order to investigate the numerical issue.
- author:
Michel Bierlaire, EPFL
- date:
Thu Apr 13 18:21:15 2023
from IPython.core.display_functions import display
import biogeme.biogeme as bio
from biogeme.models import piecewise_formula
import biogeme.biogeme_logging as blog
from biogeme.expressions import Beta, Elem, bioNormalCdf
from biogeme.data.optima import (
read_data,
age_65_more,
ScaledIncome,
moreThanOneCar,
moreThanOneBike,
individualHouse,
male,
haveChildren,
haveGA,
highEducation,
Envir01,
Envir02,
Envir03,
Mobil11,
Mobil14,
Mobil16,
Mobil17,
)
logger = blog.get_screen_logger(level=blog.INFO)
logger.info('Example b07problem_simul.py')
Example b07problem_simul.py
Parameters to be estimated
coef_intercept = Beta('coef_intercept', 0.0, None, None, 0)
coef_age_65_more = Beta('coef_age_65_more', 0.0, None, None, 0)
coef_haveGA = Beta('coef_haveGA', 0.0, None, None, 0)
coef_moreThanOneCar = Beta('coef_moreThanOneCar', 0.0, None, None, 0)
coef_moreThanOneBike = Beta('coef_moreThanOneBike', 0.0, None, None, 0)
coef_individualHouse = Beta('coef_individualHouse', 0.0, None, None, 0)
coef_male = Beta('coef_male', 0.0, None, None, 0)
coef_haveChildren = Beta('coef_haveChildren', 0.0, None, None, 0)
coef_highEducation = Beta('coef_highEducation', 0.0, None, None, 0)
thresholds = [None, 4, 6, 8, 10, None]
formula_income = piecewise_formula(variable=ScaledIncome, thresholds=thresholds)
Latent variable: structural equation.
CARLOVERS = (
coef_intercept
+ coef_age_65_more * age_65_more
+ formula_income
+ coef_moreThanOneCar * moreThanOneCar
+ coef_moreThanOneBike * moreThanOneBike
+ coef_individualHouse * individualHouse
+ coef_male * male
+ coef_haveChildren * haveChildren
+ coef_haveGA * haveGA
+ coef_highEducation * highEducation
)
Measurement equations
Intercepts.
INTER_Envir01 = Beta('INTER_Envir01', 0, None, None, 1)
INTER_Envir02 = Beta('INTER_Envir02', 0, None, None, 0)
INTER_Envir03 = Beta('INTER_Envir03', 0, None, None, 0)
INTER_Mobil11 = Beta('INTER_Mobil11', 0, None, None, 0)
INTER_Mobil14 = Beta('INTER_Mobil14', 0, None, None, 0)
INTER_Mobil16 = Beta('INTER_Mobil16', 0, None, None, 0)
INTER_Mobil17 = Beta('INTER_Mobil17', 0, None, None, 0)
Coefficients.
B_Envir01_F1 = Beta('B_Envir01_F1', -1, None, None, 1)
B_Envir02_F1 = Beta('B_Envir02_F1', -1, None, None, 0)
B_Envir03_F1 = Beta('B_Envir03_F1', 1, None, None, 0)
B_Mobil11_F1 = Beta('B_Mobil11_F1', 1, None, None, 0)
B_Mobil14_F1 = Beta('B_Mobil14_F1', 1, None, None, 0)
B_Mobil16_F1 = Beta('B_Mobil16_F1', 1, None, None, 0)
B_Mobil17_F1 = Beta('B_Mobil17_F1', 1, None, None, 0)
Linear models.
MODEL_Envir01 = INTER_Envir01 + B_Envir01_F1 * CARLOVERS
MODEL_Envir02 = INTER_Envir02 + B_Envir02_F1 * CARLOVERS
MODEL_Envir03 = INTER_Envir03 + B_Envir03_F1 * CARLOVERS
MODEL_Mobil11 = INTER_Mobil11 + B_Mobil11_F1 * CARLOVERS
MODEL_Mobil14 = INTER_Mobil14 + B_Mobil14_F1 * CARLOVERS
MODEL_Mobil16 = INTER_Mobil16 + B_Mobil16_F1 * CARLOVERS
MODEL_Mobil17 = INTER_Mobil17 + B_Mobil17_F1 * CARLOVERS
Scale parameters.
SIGMA_STAR_Envir01 = Beta('SIGMA_STAR_Envir01', 1, 1.0e-5, None, 1)
SIGMA_STAR_Envir02 = Beta('SIGMA_STAR_Envir02', 0.01, 1.0e-5, None, 0)
SIGMA_STAR_Envir03 = Beta('SIGMA_STAR_Envir03', 1, 1.0e-5, None, 0)
SIGMA_STAR_Mobil11 = Beta('SIGMA_STAR_Mobil11', 1, 1.0e-5, None, 0)
SIGMA_STAR_Mobil14 = Beta('SIGMA_STAR_Mobil14', 1, 1.0e-5, None, 0)
SIGMA_STAR_Mobil16 = Beta('SIGMA_STAR_Mobil16', 1, 1.0e-5, None, 0)
SIGMA_STAR_Mobil17 = Beta('SIGMA_STAR_Mobil17', 1, 1.0e-5, None, 0)
Symmetric thresholds.
delta_1 = Beta('delta_1', 0.1, 1.0e-5, None, 0)
delta_2 = Beta('delta_2', 0.2, 1.0e-5, None, 0)
tau_1 = -delta_1 - delta_2
tau_2 = -delta_1
tau_3 = delta_1
tau_4 = delta_1 + delta_2
Ordered probit models.
Envir01_tau_1 = (tau_1 - MODEL_Envir01) / SIGMA_STAR_Envir01
Envir01_tau_2 = (tau_2 - MODEL_Envir01) / SIGMA_STAR_Envir01
Envir01_tau_3 = (tau_3 - MODEL_Envir01) / SIGMA_STAR_Envir01
Envir01_tau_4 = (tau_4 - MODEL_Envir01) / SIGMA_STAR_Envir01
IndEnvir01 = {
1: bioNormalCdf(Envir01_tau_1),
2: bioNormalCdf(Envir01_tau_2) - bioNormalCdf(Envir01_tau_1),
3: bioNormalCdf(Envir01_tau_3) - bioNormalCdf(Envir01_tau_2),
4: bioNormalCdf(Envir01_tau_4) - bioNormalCdf(Envir01_tau_3),
5: 1 - bioNormalCdf(Envir01_tau_4),
6: 1.0,
-1: 1.0,
-2: 1.0,
}
P_Envir01 = Elem(IndEnvir01, Envir01)
Envir02_tau_1 = (tau_1 - MODEL_Envir02) / SIGMA_STAR_Envir02
Envir02_tau_2 = (tau_2 - MODEL_Envir02) / SIGMA_STAR_Envir02
Envir02_tau_3 = (tau_3 - MODEL_Envir02) / SIGMA_STAR_Envir02
Envir02_tau_4 = (tau_4 - MODEL_Envir02) / SIGMA_STAR_Envir02
IndEnvir02 = {
1: bioNormalCdf(Envir02_tau_1),
2: bioNormalCdf(Envir02_tau_2) - bioNormalCdf(Envir02_tau_1),
3: bioNormalCdf(Envir02_tau_3) - bioNormalCdf(Envir02_tau_2),
4: bioNormalCdf(Envir02_tau_4) - bioNormalCdf(Envir02_tau_3),
5: 1 - bioNormalCdf(Envir02_tau_4),
6: 1.0,
-1: 1.0,
-2: 1.0,
}
P_Envir02 = Elem(IndEnvir02, Envir02)
Envir03_tau_1 = (tau_1 - MODEL_Envir03) / SIGMA_STAR_Envir03
Envir03_tau_2 = (tau_2 - MODEL_Envir03) / SIGMA_STAR_Envir03
Envir03_tau_3 = (tau_3 - MODEL_Envir03) / SIGMA_STAR_Envir03
Envir03_tau_4 = (tau_4 - MODEL_Envir03) / SIGMA_STAR_Envir03
IndEnvir03 = {
1: bioNormalCdf(Envir03_tau_1),
2: bioNormalCdf(Envir03_tau_2) - bioNormalCdf(Envir03_tau_1),
3: bioNormalCdf(Envir03_tau_3) - bioNormalCdf(Envir03_tau_2),
4: bioNormalCdf(Envir03_tau_4) - bioNormalCdf(Envir03_tau_3),
5: 1 - bioNormalCdf(Envir03_tau_4),
6: 1.0,
-1: 1.0,
-2: 1.0,
}
P_Envir03 = Elem(IndEnvir03, Envir03)
Mobil11_tau_1 = (tau_1 - MODEL_Mobil11) / SIGMA_STAR_Mobil11
Mobil11_tau_2 = (tau_2 - MODEL_Mobil11) / SIGMA_STAR_Mobil11
Mobil11_tau_3 = (tau_3 - MODEL_Mobil11) / SIGMA_STAR_Mobil11
Mobil11_tau_4 = (tau_4 - MODEL_Mobil11) / SIGMA_STAR_Mobil11
IndMobil11 = {
1: bioNormalCdf(Mobil11_tau_1),
2: bioNormalCdf(Mobil11_tau_2) - bioNormalCdf(Mobil11_tau_1),
3: bioNormalCdf(Mobil11_tau_3) - bioNormalCdf(Mobil11_tau_2),
4: bioNormalCdf(Mobil11_tau_4) - bioNormalCdf(Mobil11_tau_3),
5: 1 - bioNormalCdf(Mobil11_tau_4),
6: 1.0,
-1: 1.0,
-2: 1.0,
}
P_Mobil11 = Elem(IndMobil11, Mobil11)
Mobil14_tau_1 = (tau_1 - MODEL_Mobil14) / SIGMA_STAR_Mobil14
Mobil14_tau_2 = (tau_2 - MODEL_Mobil14) / SIGMA_STAR_Mobil14
Mobil14_tau_3 = (tau_3 - MODEL_Mobil14) / SIGMA_STAR_Mobil14
Mobil14_tau_4 = (tau_4 - MODEL_Mobil14) / SIGMA_STAR_Mobil14
IndMobil14 = {
1: bioNormalCdf(Mobil14_tau_1),
2: bioNormalCdf(Mobil14_tau_2) - bioNormalCdf(Mobil14_tau_1),
3: bioNormalCdf(Mobil14_tau_3) - bioNormalCdf(Mobil14_tau_2),
4: bioNormalCdf(Mobil14_tau_4) - bioNormalCdf(Mobil14_tau_3),
5: 1 - bioNormalCdf(Mobil14_tau_4),
6: 1.0,
-1: 1.0,
-2: 1.0,
}
P_Mobil14 = Elem(IndMobil14, Mobil14)
Mobil16_tau_1 = (tau_1 - MODEL_Mobil16) / SIGMA_STAR_Mobil16
Mobil16_tau_2 = (tau_2 - MODEL_Mobil16) / SIGMA_STAR_Mobil16
Mobil16_tau_3 = (tau_3 - MODEL_Mobil16) / SIGMA_STAR_Mobil16
Mobil16_tau_4 = (tau_4 - MODEL_Mobil16) / SIGMA_STAR_Mobil16
IndMobil16 = {
1: bioNormalCdf(Mobil16_tau_1),
2: bioNormalCdf(Mobil16_tau_2) - bioNormalCdf(Mobil16_tau_1),
3: bioNormalCdf(Mobil16_tau_3) - bioNormalCdf(Mobil16_tau_2),
4: bioNormalCdf(Mobil16_tau_4) - bioNormalCdf(Mobil16_tau_3),
5: 1 - bioNormalCdf(Mobil16_tau_4),
6: 1.0,
-1: 1.0,
-2: 1.0,
}
P_Mobil16 = Elem(IndMobil16, Mobil16)
Mobil17_tau_1 = (tau_1 - MODEL_Mobil17) / SIGMA_STAR_Mobil17
Mobil17_tau_2 = (tau_2 - MODEL_Mobil17) / SIGMA_STAR_Mobil17
Mobil17_tau_3 = (tau_3 - MODEL_Mobil17) / SIGMA_STAR_Mobil17
Mobil17_tau_4 = (tau_4 - MODEL_Mobil17) / SIGMA_STAR_Mobil17
IndMobil17 = {
1: bioNormalCdf(Mobil17_tau_1),
2: bioNormalCdf(Mobil17_tau_2) - bioNormalCdf(Mobil17_tau_1),
3: bioNormalCdf(Mobil17_tau_3) - bioNormalCdf(Mobil17_tau_2),
4: bioNormalCdf(Mobil17_tau_4) - bioNormalCdf(Mobil17_tau_3),
5: 1 - bioNormalCdf(Mobil17_tau_4),
6: 1.0,
-1: 1.0,
-2: 1.0,
}
P_Mobil17 = Elem(IndMobil17, Mobil17)
Expressions ot simulate.
simulate = {
'P_Envir01': P_Envir01,
'P_Envir02': P_Envir02,
'P_Envir03': P_Envir03,
'P_Mobil11': P_Mobil11,
'P_Mobil14': P_Mobil14,
'P_Mobil16': P_Mobil16,
'P_Mobil17': P_Mobil17,
}
Extract the list of parameters in the simulated expressions.
beta_values = {}
for expr in simulate.values():
beta_values = beta_values | expr.get_beta_values()
Read the data
database = read_data()
Create the Biogeme object
biosim = bio.BIOGEME(database, simulate)
biosim.modelName = '07problem_simul'
Biogeme parameters read from biogeme.toml.
simulated_values = biosim.simulate(the_beta_values=beta_values)
display(simulated_values)
P_Envir01 P_Envir02 P_Envir03 ... P_Mobil14 P_Mobil16 P_Mobil17
0 0.079656 0.000000e+00 0.382089 ... 0.382089 0.382089 0.382089
2 1.000000 1.000000e+00 1.000000 ... 1.000000 1.000000 1.000000
3 0.079656 7.619853e-24 0.079656 ... 0.382089 0.079656 0.078084
4 0.382089 1.000000e+00 0.079656 ... 0.382089 0.382089 0.078084
5 0.078084 0.000000e+00 0.079656 ... 0.078084 0.382089 0.079656
... ... ... ... ... ... ... ...
2259 0.079656 0.000000e+00 0.078084 ... 0.382089 0.079656 0.079656
2261 0.078084 1.000000e+00 0.078084 ... 1.000000 0.078084 0.078084
2262 0.078084 1.000000e+00 0.078084 ... 1.000000 0.078084 0.078084
2263 0.382089 4.906714e-198 0.382089 ... 0.382089 0.382089 1.000000
2264 0.382089 4.906714e-198 0.382089 ... 0.382089 0.382089 1.000000
[1906 rows x 7 columns]
We identify the entries for which the likelihood is zero, so that the log likelihood cannot be computed
zero_values = simulated_values.where(simulated_values == 0, other='')
display(zero_values)
P_Envir01 P_Envir02 P_Envir03 P_Mobil11 P_Mobil14 P_Mobil16 P_Mobil17
0 0.0
2
3
4
5 0.0
... ... ... ... ... ... ... ...
2259 0.0
2261
2262
2263
2264
[1906 rows x 7 columns]
Diagnostic.
print('The problematic model is Envir02')
The problematic model is Envir02
Total running time of the script: (0 minutes 0.276 seconds)