Source code for biogeme.segmentation.segmentation
"""Class that provides some automatic specification for segmented parameters
Michel Bierlaire
Thu Apr 3 12:07:44 2025
"""
from __future__ import annotations
from collections.abc import Iterable
from itertools import product
from typing import NamedTuple, TYPE_CHECKING
import pandas as pd
from .one_segmentation import OneSegmentation
from .segmentation_context import DiscreteSegmentationTuple
from biogeme.exceptions import BiogemeError
from biogeme.results_processing import EstimationResults
if TYPE_CHECKING:
from biogeme.expressions import Beta, Expression, MultipleSum
[docs]
class Segmentation:
"""Segmentation of a parameter, possibly with multiple socio-economic variables"""
def __init__(
self,
beta: Beta,
segmentation_tuples: Iterable[DiscreteSegmentationTuple],
prefix: str = 'segmented',
):
"""Ctor
:param beta: parameter to be segmented
:param segmentation_tuples: characterization of the segmentations
:param prefix: prefix to be used to generate the name of the
segmented parameter
"""
segmentation_tuples = tuple(segmentation_tuples)
if not segmentation_tuples:
raise BiogemeError('segmentation_tuples cannot be empty')
self.beta: Beta = beta
self.segmentations: tuple[OneSegmentation, ...] = tuple(
OneSegmentation(beta, s) for s in segmentation_tuples
)
self.prefix = prefix
[docs]
def get_beta_ref_name(self) -> str:
"""
Add a suffix to the name of the parameter
"""
return self.beta.name + '_ref'
[docs]
def beta_ref_code(self) -> str:
"""Constructs the Python code for the parameter
:return: Python code
:rtype: str
"""
beta_name = f"'{self.get_beta_ref_name()}'"
return (
f'Beta({beta_name}, {self.beta.init_value}, {self.beta.lower_bound}, '
f'{self.beta.upper_bound}, {self.beta.status})'
)
[docs]
def get_reference_beta(self) -> Beta:
"""Obtain the reference beta"""
from biogeme.expressions import Beta
beta_name = self.get_beta_ref_name()
return Beta(
beta_name,
self.beta.init_value,
self.beta.lower_bound,
self.beta.upper_bound,
self.beta.status,
)
[docs]
def segmented_beta(self) -> Expression:
"""Create an expressions that combines all the segments
:return: combined expression
:rtype: biogeme.expressions.Expression
"""
from biogeme.expressions import MultipleSum
ref_beta = self.get_reference_beta()
terms = [ref_beta]
terms += [
element for s in self.segmentations for element in s.list_of_expressions()
]
return MultipleSum(terms)
[docs]
def segmented_code(self) -> str:
"""Create the Python code for an expressions that combines all the segments
:return: Python code for the combined expression
:rtype: str
"""
result = '\n'.join(
[
s.beta_code(c, assignment=True)
for s in self.segmentations
for c in s.mapping.values()
]
)
result += '\n'
terms = [self.beta_ref_code()]
terms += [element for s in self.segmentations for element in s.list_of_code()]
if len(terms) == 1:
result += terms[0]
else:
joined_terms = ', '.join(terms)
result += f'{self.prefix}_{self.beta.name} = bioMultSum([{joined_terms}])'
return result
[docs]
def calculates_estimated_values(
self, estimation_results: EstimationResults
) -> pd.DataFrame:
"""Calculates the estimated values of the parameter for each segment.
:param estimation_results: results of the estimation
:return: a pandas data frame with the definition of the segments and the corresponding values for the
coefficient
"""
class SegmentationValue(NamedTuple):
segmentation: OneSegmentation
value: str
all_segmentations = [
list(
SegmentationValue(segment, value)
for value in segment.segmentation_tuple.mapping.values()
)
for segment in self.segmentations
]
# Use itertools.product to generate all combinations
beta_values = estimation_results.get_beta_values()
ref_beta_name = self.get_reference_beta().name
ref_value = beta_values[ref_beta_name]
list_of_rows = []
for combination in product(*all_segmentations):
the_row = {
element.segmentation.variable.name: element.value
for element in combination
}
the_row['parameter estimate'] = ref_value
for element in combination:
the_name = element.segmentation.beta_name(category=element.value)
if the_name != ref_beta_name:
the_value = beta_values[the_name]
the_row['parameter estimate'] += the_value
list_of_rows.append(the_row)
df = pd.DataFrame(list_of_rows)
return df