Source code for biogeme.expressions.idmanager

"""Combine several arithmetic expressions and a database to obtain formulas

:author: Michel Bierlaire
:date: Sat Jul 30 12:36:40 2022
"""
import logging
from typing import NamedTuple, Dict, List, TYPE_CHECKING
import biogeme.exceptions as excep

if TYPE_CHECKING:
    from .base_expressions import Expression

from .elementary_types import TypeOfElementaryExpression



[docs]
class ElementsTuple(NamedTuple):
    expressions: Dict[str, 'Expression']
    indices: Dict[str, int]
    names: List[str]



logger = logging.getLogger(__name__)



[docs]
class IdManager:
    """Class combining managing the ids of an arithmetic expression."""


[docs]
    def __init__(self, expressions, database, number_of_draws):
        """Ctor

        :param expressions: list of expressions
        :type expressions: list(biogeme.expressions.Expression)

        :param database: database with the variables as column names
        :type database: biogeme.database.Database

        :param number_of_draws: number of draws for Monte-Carlo integration
        :type number_of_draws: int

        :raises BiogemeError: if an expression contains a variable and
            no database is provided.

        """
        self.expressions = expressions
        self.database = database
        self.number_of_draws = number_of_draws
        self.elementary_expressions = None
        self.free_betas = None
        self.free_betas_values = None
        self.number_of_free_betas = 0
        self.fixed_betas = None
        self.fixed_betas_values = None
        self.bounds = None
        self.random_variables = None
        self.draws = None
        self.variables = None
        self.requires_draws = False
        for f in self.expressions:
            the_variables = f.set_of_elementary_expression(
                the_type=TypeOfElementaryExpression.VARIABLE
            )
            if the_variables and database is None:
                raise excep.BiogemeError(
                    f'No database is provided and an expression '
                    f'contains variables: {the_variables}'
                )
            if f.embedExpression('MonteCarlo') or f.embedExpression('bioDraws'):
                self.requires_draws = True

        self.prepare()


    def __str__(self):
        return str(self.elementary_expressions.indices)

    def __repr__(self):
        return str(self.elementary_expressions.indices)


[docs]
    def __eq__(self, other):
        return self.elementary_expressions == other.elementary_expressions



[docs]
    def audit(self):
        """Performs various checks on the expressions.

        :return: tuple listOfErrors, listOfWarnings
        :rtype: list(string), list(string)
        """
        list_of_errors = []
        list_of_warnings = []
        if self.database.isPanel():
            dict_of_variables = self.expressions.dictOfVariablesOutsidePanelTrajectory()
            if dict_of_variables:
                err_msg = (
                    f'Error in the loglikelihood function. '
                    f'Some variables are not inside '
                    f'PanelLikelihoodTrajectory: '
                    f'{dict_of_variables.keys()} .'
                    f'If the database is organized as panel data, '
                    f'all variables must be used inside a '
                    f'PanelLikelihoodTrajectory. '
                    f'If it is not consistent with your model, '
                    f'generate a flat '
                    f'version of the data using the function '
                    f'`generateFlatPanelDataframe`.'
                )
                list_of_errors.append(err_msg)
        return list_of_errors, list_of_warnings



[docs]
    def change_init_values(self, betas):
        """Modifies the values of the pameters

        :param betas: dictionary where the keys are the names of the
                      parameters, and the values are the new value for
                      the parameters.
        :type betas: dict(string:float)
        """

        def get_value(name):
            v = betas.get(name)
            if v is None:
                return self.free_betas.expressions[name].initValue
            return v

        self.free_betas_values = [get_value(x) for x in self.free_betas.names]



[docs]
    def expressions_names_indices(self, dict_of_elements):
        """Assigns consecutive indices to expressions

        :param dict_of_elements: dictionary of expressions. The keys
            are the names.
        :type dict_of_elements: dict(str: biogeme.expressions.Expression)

        :return: a tuple with the original dictionary, the indices,
            and the sorted names.
        :rtype: ElementsTuple
        """
        indices = {}
        names = {}
        names = sorted(dict_of_elements)
        for i, v in enumerate(names):
            indices[v] = i

        return ElementsTuple(expressions=dict_of_elements, indices=indices, names=names)



[docs]
    def prepare(self):
        """Extract from the formulas the literals (parameters,
        variables, random variables) and decide a numbering convention.

        The numbering is done in the following order:

        (i) free betas,
        (ii) fixed betas,
        (iii) random variables for numerical integration,
        (iv) random variables for Monte-Carlo integration,
        (v) variables

        The numbering convention will be performed for all expressions
        together, so that the same elementary expressions in several
        expressions will have the same index.


        """

        # Free parameters (to be estimated), sorted by alphabetical order
        expr = {}
        for f in self.expressions:
            d = f.dict_of_elementary_expression(
                the_type=TypeOfElementaryExpression.FREE_BETA
            )
            expr = dict(expr, **d)

        self.free_betas = self.expressions_names_indices(expr)

        self.bounds = [
            (
                self.free_betas.expressions[b].lb,
                self.free_betas.expressions[b].ub,
            )
            for b in self.free_betas.names
        ]
        self.number_of_free_betas = len(self.free_betas.names)
        # Fixed parameters (not to be estimated), sorted by alphatical order.
        expr = {}
        for f in self.expressions:
            d = f.dict_of_elementary_expression(
                the_type=TypeOfElementaryExpression.FIXED_BETA
            )
            expr = dict(expr, **d)
        self.fixed_betas = self.expressions_names_indices(expr)

        # Random variables for numerical integration
        expr = {}
        for f in self.expressions:
            d = f.dict_of_elementary_expression(
                the_type=TypeOfElementaryExpression.RANDOM_VARIABLE
            )
            expr = dict(expr, **d)
        self.random_variables = self.expressions_names_indices(expr)

        # Draws
        expr = {}
        for f in self.expressions:
            d = f.dict_of_elementary_expression(
                the_type=TypeOfElementaryExpression.DRAWS
            )
            expr = dict(expr, **d)
        self.draws = self.expressions_names_indices(expr)

        # Variables
        # Here, we do not extract the variables from the
        # formulas. Instead, we use all the variables in the database.
        if self.database is not None:
            variables_names = list(self.database.data.columns.values)
            variables_indices = {}
            for i, v in enumerate(variables_names):
                variables_indices[v] = i
            self.variables = ElementsTuple(
                expressions=None,
                indices=variables_indices,
                names=variables_names,
            )
        else:
            self.variables = ElementsTuple(expressions=None, indices=None, names=[])

        # Merge all the names
        elementary_expressions_names = (
            self.free_betas.names
            + self.fixed_betas.names
            + self.random_variables.names
            + self.draws.names
            + self.variables.names
        )

        if len(elementary_expressions_names) != len(set(elementary_expressions_names)):
            duplicates = {
                x
                for x in elementary_expressions_names
                if elementary_expressions_names.count(x) > 1
            }
            error_msg = (
                f'The following elementary expressions are defined '
                f'more than once: {duplicates}.'
            )
            raise excep.BiogemeError(error_msg)

        elementary_expressions_indices = {
            v: i for i, v in enumerate(elementary_expressions_names)
        }

        self.elementary_expressions = ElementsTuple(
            expressions=None,
            indices=elementary_expressions_indices,
            names=elementary_expressions_names,
        )

        self.free_betas_values = [
            self.free_betas.expressions[x].initValue for x in self.free_betas.names
        ]
        self.fixed_betas_values = [
            self.fixed_betas.expressions[x].initValue for x in self.fixed_betas.names
        ]

        if self.requires_draws:
            self.database.generateDraws(
                self.draws.expressions, self.draws.names, self.number_of_draws
            )



[docs]
    def setDataMap(self, sample):
        """Specify the map of the panel data in the expressions

        :param sample: map of the panel data (see
            :func:`biogeme.database.Database.buildPanelMap`)
        :type sample: pandas.DataFrame
        """
        for f in self.expressions:
            f.cpp.setDataMap(sample)



[docs]
    def setData(self, sample):
        """Specify the sample

        :param sample: map of the panel data (see
            :func:`biogeme.database.Database.buildPanelMap`)
        :type sample: pandas.DataFrame

        """
        for f in self.expressions:
            f.cpp.setData(sample)