Note
Go to the end to download the full example code.
Example of a catalog
Illustration of the concept of catalog. See Bierlaire and Ortelli (2023)
- author:
Michel Bierlaire, EPFL
- date:
Sun Aug 6 18:13:18 2023
import numpy as np
from biogeme import models
from biogeme.expressions import Beta, Variable, Expression
from biogeme.models import boxcox
from biogeme.catalog import (
Catalog,
generic_alt_specific_catalogs,
segmentation_catalogs,
)
from biogeme.nests import OneNestForNestedLogit, NestsForNestedLogit
from biogeme.data.swissmetro import (
read_data,
CHOICE,
SM_AV,
CAR_AV_SP,
TRAIN_AV_SP,
TRAIN_TT_SCALED,
TRAIN_COST_SCALED,
SM_TT_SCALED,
SM_COST_SCALED,
CAR_TT_SCALED,
CAR_CO_SCALED,
)
Function printing all configurations of an expression.
def print_all_configurations(expression: Expression) -> None:
"""Prints all configurations that an expression can take"""
expression.set_central_controller()
total = expression.central_controller.number_of_configurations()
print(f'Total: {total} configurations')
for config_id in expression.central_controller.all_configurations_ids:
print(config_id)
Parameters to be estimated.
ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)
B_TIME = Beta('B_TIME', 0, None, None, 0)
B_COST = Beta('B_COST', 0, None, None, 0)
Definition of the utility functions.
V1 = ASC_TRAIN + B_TIME * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED
V2 = B_TIME * SM_TT_SCALED + B_COST * SM_COST_SCALED
V3 = ASC_CAR + B_TIME * CAR_TT_SCALED + B_COST * CAR_CO_SCALED
Associate utility functions with the numbering of alternatives.
V = {1: V1, 2: V2, 3: V3}
Associate the availability conditions with the alternatives.
av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}
Definition of the model. This is the contribution of each observation to the log likelihood function.
logprob_logit = models.loglogit(V, av, CHOICE)
Nest definition.
mu_existing = Beta('mu_existing', 1, 1, 10, 0)
existing = OneNestForNestedLogit(nest_param=mu_existing, list_of_alternatives=[1, 3])
nests = NestsForNestedLogit(choice_set=list(V), tuple_of_nests=(existing,))
Contribution to the log-likelihood.
logprob_nested = models.lognested(V, av, nests, CHOICE)
Definition of the catalog containing two models specifications: logit and nested logit.
model_catalog = Catalog.from_dict(
catalog_name='model_catalog',
dict_of_expressions={
'logit': logprob_logit,
'nested': logprob_nested,
},
)
Current status of the catalog.
print(model_catalog)
[model_catalog: logit]_bioLogLogit[choice=CHOICE]U=(1:((Beta('ASC_TRAIN', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * TRAIN_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * TRAIN_COST_SCALED)), 2:((Beta('B_TIME', 0, None, None, 0) * SM_TT_SCALED) + (Beta('B_COST', 0, None, None, 0) * SM_COST_SCALED)), 3:((Beta('ASC_CAR', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * CAR_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * CAR_CO_SCALED)))av=(1:TRAIN_AV_SP, 2:SM_AV, 3:CAR_AV_SP)
Use the controller to select a different configuration.
model_catalog.controlled_by.set_name('nested')
print(model_catalog)
[model_catalog: nested]_bioLogLogit[choice=CHOICE]U=(1:(((Beta('ASC_TRAIN', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * TRAIN_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * TRAIN_COST_SCALED)) + (((Beta('mu_existing', 1, 1, 10, 0) - `1.0`) * ((Beta('ASC_TRAIN', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * TRAIN_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * TRAIN_COST_SCALED))) + (((`1.0` / Beta('mu_existing', 1, 1, 10, 0)) - `1.0`) * log(ConditionalSum((TRAIN_AV_SP != `0.0`): exp((Beta('mu_existing', 1, 1, 10, 0) * ((Beta('ASC_TRAIN', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * TRAIN_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * TRAIN_COST_SCALED)))), (CAR_AV_SP != `0.0`): exp((Beta('mu_existing', 1, 1, 10, 0) * ((Beta('ASC_CAR', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * CAR_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * CAR_CO_SCALED))))))))), 2:(((Beta('B_TIME', 0, None, None, 0) * SM_TT_SCALED) + (Beta('B_COST', 0, None, None, 0) * SM_COST_SCALED)) + `0.0`), 3:(((Beta('ASC_CAR', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * CAR_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * CAR_CO_SCALED)) + (((Beta('mu_existing', 1, 1, 10, 0) - `1.0`) * ((Beta('ASC_CAR', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * CAR_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * CAR_CO_SCALED))) + (((`1.0` / Beta('mu_existing', 1, 1, 10, 0)) - `1.0`) * log(ConditionalSum((TRAIN_AV_SP != `0.0`): exp((Beta('mu_existing', 1, 1, 10, 0) * ((Beta('ASC_TRAIN', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * TRAIN_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * TRAIN_COST_SCALED)))), (CAR_AV_SP != `0.0`): exp((Beta('mu_existing', 1, 1, 10, 0) * ((Beta('ASC_CAR', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * CAR_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * CAR_CO_SCALED))))))))))av=(1:TRAIN_AV_SP, 2:SM_AV, 3:CAR_AV_SP)
Iterator.
for specification in model_catalog:
print(specification)
[model_catalog: logit]_bioLogLogit[choice=CHOICE]U=(1:((Beta('ASC_TRAIN', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * TRAIN_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * TRAIN_COST_SCALED)), 2:((Beta('B_TIME', 0, None, None, 0) * SM_TT_SCALED) + (Beta('B_COST', 0, None, None, 0) * SM_COST_SCALED)), 3:((Beta('ASC_CAR', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * CAR_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * CAR_CO_SCALED)))av=(1:TRAIN_AV_SP, 2:SM_AV, 3:CAR_AV_SP)
[model_catalog: nested]_bioLogLogit[choice=CHOICE]U=(1:(((Beta('ASC_TRAIN', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * TRAIN_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * TRAIN_COST_SCALED)) + (((Beta('mu_existing', 1, 1, 10, 0) - `1.0`) * ((Beta('ASC_TRAIN', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * TRAIN_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * TRAIN_COST_SCALED))) + (((`1.0` / Beta('mu_existing', 1, 1, 10, 0)) - `1.0`) * log(ConditionalSum((TRAIN_AV_SP != `0.0`): exp((Beta('mu_existing', 1, 1, 10, 0) * ((Beta('ASC_TRAIN', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * TRAIN_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * TRAIN_COST_SCALED)))), (CAR_AV_SP != `0.0`): exp((Beta('mu_existing', 1, 1, 10, 0) * ((Beta('ASC_CAR', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * CAR_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * CAR_CO_SCALED))))))))), 2:(((Beta('B_TIME', 0, None, None, 0) * SM_TT_SCALED) + (Beta('B_COST', 0, None, None, 0) * SM_COST_SCALED)) + `0.0`), 3:(((Beta('ASC_CAR', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * CAR_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * CAR_CO_SCALED)) + (((Beta('mu_existing', 1, 1, 10, 0) - `1.0`) * ((Beta('ASC_CAR', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * CAR_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * CAR_CO_SCALED))) + (((`1.0` / Beta('mu_existing', 1, 1, 10, 0)) - `1.0`) * log(ConditionalSum((TRAIN_AV_SP != `0.0`): exp((Beta('mu_existing', 1, 1, 10, 0) * ((Beta('ASC_TRAIN', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * TRAIN_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * TRAIN_COST_SCALED)))), (CAR_AV_SP != `0.0`): exp((Beta('mu_existing', 1, 1, 10, 0) * ((Beta('ASC_CAR', 0, None, None, 0) + (Beta('B_TIME', 0, None, None, 0) * CAR_TT_SCALED)) + (Beta('B_COST', 0, None, None, 0) * CAR_CO_SCALED))))))))))av=(1:TRAIN_AV_SP, 2:SM_AV, 3:CAR_AV_SP)
All configurations.
print_all_configurations(model_catalog)
Total: 2 configurations
model_catalog:logit
model_catalog:nested
TRAIN_TT = Variable('TRAIN_TT')
TRAIN_COST = Variable('TRAIN_COST')
ell_travel_time = Beta('lambda_travel_time', 1, -10, 10, 0)
linear_train_tt = TRAIN_TT
boxcox_train_tt = boxcox(TRAIN_TT, ell_travel_time)
squared_train_tt = TRAIN_TT * TRAIN_TT
train_tt_catalog = Catalog.from_dict(
catalog_name='train_tt_catalog',
dict_of_expressions={
'linear': linear_train_tt,
'boxcox': boxcox_train_tt,
'squared': squared_train_tt,
},
)
Define a utility function involving the catalog.
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)
B_TIME = Beta('B_TIME', 0, None, 0, 0)
V_TRAIN = ASC_TRAIN + B_TIME * train_tt_catalog
print_all_configurations(V_TRAIN)
Total: 3 configurations
train_tt_catalog:squared
train_tt_catalog:linear
train_tt_catalog:boxcox
Unsynchronized catalogs
CAR_TT = Variable('CAR_TT')
CAR_COST = Variable('CAR_COST')
linear_car_tt = CAR_TT
boxcox_car_tt = boxcox(CAR_TT, ell_travel_time)
squared_car_tt = CAR_TT * CAR_TT
car_tt_catalog = Catalog.from_dict(
catalog_name='car_tt_catalog',
dict_of_expressions={
'linear': linear_car_tt,
'boxcox': boxcox_car_tt,
'squared': squared_car_tt,
},
)
Create a dummy expression with the two catalogs.
dummy_expression = train_tt_catalog + car_tt_catalog
print_all_configurations(dummy_expression)
Total: 9 configurations
car_tt_catalog:linear;train_tt_catalog:linear
car_tt_catalog:squared;train_tt_catalog:linear
car_tt_catalog:linear;train_tt_catalog:squared
car_tt_catalog:linear;train_tt_catalog:boxcox
car_tt_catalog:boxcox;train_tt_catalog:boxcox
car_tt_catalog:boxcox;train_tt_catalog:linear
car_tt_catalog:boxcox;train_tt_catalog:squared
car_tt_catalog:squared;train_tt_catalog:boxcox
car_tt_catalog:squared;train_tt_catalog:squared
Synchronized catalogs.
CAR_TT = Variable('CAR_TT')
CAR_COST = Variable('CAR_COST')
linear_car_tt = CAR_TT
boxcox_car_tt = boxcox(CAR_TT, ell_travel_time)
squared_car_tt = CAR_TT * CAR_TT
car_tt_catalog = Catalog.from_dict(
catalog_name='car_tt_catalog',
dict_of_expressions={
'linear': linear_car_tt,
'boxcox': boxcox_car_tt,
'squared': squared_car_tt,
},
controlled_by=train_tt_catalog.controlled_by,
)
Create a dummy expression with the two catalogs.
dummy_expression = train_tt_catalog + car_tt_catalog
print_all_configurations(dummy_expression)
Total: 3 configurations
train_tt_catalog:squared
train_tt_catalog:linear
train_tt_catalog:boxcox
Alternative specific specification.
(B_TIME_catalog_dict, B_COST_catalog_dict) = generic_alt_specific_catalogs(
generic_name='coefficients',
beta_parameters=[B_TIME, B_COST],
alternatives=('TRAIN', 'CAR'),
)
Create utility functions involving those catalogs.
V_TRAIN = (
B_TIME_catalog_dict['TRAIN'] * TRAIN_TT + B_COST_catalog_dict['TRAIN'] * TRAIN_COST
)
V_CAR = B_TIME_catalog_dict['CAR'] * CAR_TT + B_COST_catalog_dict['CAR'] * CAR_COST
Create a dummy expression involving the utility functions.
dummy_expression = V_TRAIN + V_CAR
print_all_configurations(dummy_expression)
Total: 2 configurations
coefficients_gen_altspec:generic
coefficients_gen_altspec:altspec
Alternative specific - not synchronized.
(B_TIME_catalog_dict,) = generic_alt_specific_catalogs(
generic_name='time_coefficient',
beta_parameters=[B_TIME],
alternatives=('TRAIN', 'CAR'),
)
(B_COST_catalog_dict,) = generic_alt_specific_catalogs(
generic_name='cost_coefficient',
beta_parameters=[B_COST],
alternatives=('TRAIN', 'CAR'),
)
Create utility functions involving those catalogs.
V_TRAIN = (
B_TIME_catalog_dict['TRAIN'] * TRAIN_TT + B_COST_catalog_dict['TRAIN'] * TRAIN_COST
)
V_CAR = B_TIME_catalog_dict['CAR'] * CAR_TT + B_COST_catalog_dict['CAR'] * CAR_COST
Create a dummy expression involving the utility functions.
dummy_expression = V_TRAIN + V_CAR
print_all_configurations(dummy_expression)
Total: 4 configurations
cost_coefficient_gen_altspec:altspec;time_coefficient_gen_altspec:generic
cost_coefficient_gen_altspec:generic;time_coefficient_gen_altspec:altspec
cost_coefficient_gen_altspec:altspec;time_coefficient_gen_altspec:altspec
cost_coefficient_gen_altspec:generic;time_coefficient_gen_altspec:generic
Read the data
database = read_data()
Segmentation
We consider two trip purposes: commuters and anything else. We need to define a binary variable first.
database.data['COMMUTERS'] = np.where(database.data['PURPOSE'] == 1, 1, 0)
Segmentation on trip purpose.
segmentation_purpose = database.generate_segmentation(
variable='COMMUTERS',
mapping={0: 'non_commuters', 1: 'commuters'},
reference='non_commuters',
)
Segmentation on luggage.
segmentation_luggage = database.generate_segmentation(
variable='LUGGAGE',
mapping={0: 'no_lugg', 1: 'one_lugg', 3: 'several_lugg'},
reference='no_lugg',
)
Catalog of segmented alternative specific constants, allows a maximum of two segmentations.
ASC_TRAIN_catalog, ASC_CAR_catalog = segmentation_catalogs(
generic_name='ASC',
beta_parameters=[ASC_TRAIN, ASC_CAR],
potential_segmentations=(
segmentation_purpose,
segmentation_luggage,
),
maximum_number=2,
)
Create a dummy expression.
dummy_expression = ASC_TRAIN_catalog + ASC_CAR_catalog
print_all_configurations(dummy_expression)
Total: 4 configurations
ASC:no_seg
ASC:COMMUTERS-LUGGAGE
ASC:COMMUTERS
ASC:LUGGAGE
Catalog of segmented alternative specific constants, allows a maximum of one segmentation.
ASC_TRAIN_catalog, ASC_CAR_catalog = segmentation_catalogs(
generic_name='ASC',
beta_parameters=[ASC_TRAIN, ASC_CAR],
potential_segmentations=(
segmentation_purpose,
segmentation_luggage,
),
maximum_number=1,
)
Create a dummy expression.
dummy_expression = ASC_TRAIN_catalog + ASC_CAR_catalog
print_all_configurations(dummy_expression)
Total: 3 configurations
ASC:no_seg
ASC:COMMUTERS
ASC:LUGGAGE
Segmentation and alternative specific Maximum one segmentation.
(B_TIME_catalog_dict,) = generic_alt_specific_catalogs(
generic_name='B_TIME',
beta_parameters=[B_TIME],
alternatives=['TRAIN', 'CAR'],
potential_segmentations=(
segmentation_purpose,
segmentation_luggage,
),
maximum_number=1,
)
print_all_configurations(B_TIME_catalog_dict['TRAIN'])
Total: 6 configurations
B_TIME:COMMUTERS;B_TIME_gen_altspec:altspec
B_TIME:COMMUTERS;B_TIME_gen_altspec:generic
B_TIME:LUGGAGE;B_TIME_gen_altspec:altspec
B_TIME:no_seg;B_TIME_gen_altspec:generic
B_TIME:no_seg;B_TIME_gen_altspec:altspec
B_TIME:LUGGAGE;B_TIME_gen_altspec:generic
Maximum two segmentations.
(B_TIME_catalog_dict,) = generic_alt_specific_catalogs(
generic_name='B_TIME',
beta_parameters=[B_TIME],
alternatives=['TRAIN', 'CAR'],
potential_segmentations=(
segmentation_purpose,
segmentation_luggage,
),
maximum_number=2,
)
print_all_configurations(B_TIME_catalog_dict['TRAIN'])
Total: 8 configurations
B_TIME:COMMUTERS;B_TIME_gen_altspec:altspec
B_TIME:COMMUTERS;B_TIME_gen_altspec:generic
B_TIME:COMMUTERS-LUGGAGE;B_TIME_gen_altspec:altspec
B_TIME:LUGGAGE;B_TIME_gen_altspec:altspec
B_TIME:no_seg;B_TIME_gen_altspec:generic
B_TIME:COMMUTERS-LUGGAGE;B_TIME_gen_altspec:generic
B_TIME:no_seg;B_TIME_gen_altspec:altspec
B_TIME:LUGGAGE;B_TIME_gen_altspec:generic
Total running time of the script: (0 minutes 0.383 seconds)