Source code for biogeme.idmanager

"""Combine several arithmetic expressions and a database to obtain formulas

:author: Michel Bierlaire
:date: Sat Jul 30 12:36:40 2022
import logging
from typing import NamedTuple, Dict, List
import biogeme.exceptions as excep
from biogeme.elementary_expressions import TypeOfElementaryExpression

[docs]class ElementsTuple(NamedTuple): expressions: Dict[str, 'Expression'] indices: Dict[str, int] names: List[str]
logger = logging.getLogger(__name__)
[docs]class IdManager: """Class combining managing the ids of an arithmetic expression."""
[docs] def __init__(self, expressions, database, number_of_draws): """Ctor :param expressions: list of expressions :type expressions: list(biogeme.expressions.Expression) :param database: database with the variables as column names :type database: biogeme.database.Database :param number_of_draws: number of draws for Monte-Carlo integration :type number_of_draws: int :raises BiogemeError: if an expression contains a variable and no database is provided. """ self.expressions = expressions self.database = database self.number_of_draws = number_of_draws self.elementary_expressions = None self.free_betas = None self.free_betas_values = None self.number_of_free_betas = 0 self.fixed_betas = None self.fixed_betas_values = None self.bounds = None self.random_variables = None self.draws = None self.variables = None self.requires_draws = False for f in self.expressions: the_variables = f.set_of_elementary_expression( the_type=TypeOfElementaryExpression.VARIABLE ) if the_variables and database is None: raise excep.BiogemeError( f'No database is provided and an expression ' f'contains variables: {the_variables}' ) if f.embedExpression('MonteCarlo') or f.embedExpression('bioDraws'): self.requires_draws = True self.prepare()
def __str__(self): return str(self.elementary_expressions.indices) def __repr__(self): return str(self.elementary_expressions.indices)
[docs] def __eq__(self, other): return self.elementary_expressions == other.elementary_expressions
[docs] def audit(self): """Performs various checks on the expressions. :return: tuple listOfErrors, listOfWarnings :rtype: list(string), list(string) """ list_of_errors = [] list_of_warnings = [] if self.database.isPanel(): dict_of_variables = self.expressions.dictOfVariablesOutsidePanelTrajectory() if dict_of_variables: err_msg = ( f'Error in the loglikelihood function. ' f'Some variables are not inside ' f'PanelLikelihoodTrajectory: ' f'{dict_of_variables.keys()} .' f'If the database is organized as panel data, ' f'all variables must be used inside a ' f'PanelLikelihoodTrajectory. ' f'If it is not consistent with your model, ' f'generate a flat ' f'version of the data using the function ' f'`generateFlatPanelDataframe`.' ) list_of_errors.append(err_msg) return list_of_errors, list_of_warnings
[docs] def change_init_values(self, betas): """Modifies the values of the pameters :param betas: dictionary where the keys are the names of the parameters, and the values are the new value for the parameters. :type betas: dict(string:float) """ def get_value(name): v = betas.get(name) if v is None: return self.free_betas.expressions[name].initValue return v self.free_betas_values = [get_value(x) for x in self.free_betas.names]
[docs] def expressions_names_indices(self, dict_of_elements): """Assigns consecutive indices to expressions :param dict_of_elements: dictionary of expressions. The keys are the names. :type dict_of_elements: dict(str: biogeme.expressions.Expression) :return: a tuple with the original dictionary, the indices, and the sorted names. :rtype: ElementsTuple """ indices = {} names = {} names = sorted(dict_of_elements) for i, v in enumerate(names): indices[v] = i return ElementsTuple(expressions=dict_of_elements, indices=indices, names=names)
[docs] def prepare(self): """Extract from the formulas the literals (parameters, variables, random variables) and decide a numbering convention. The numbering is done in the following order: (i) free betas, (ii) fixed betas, (iii) random variables for numerical integration, (iv) random variables for Monte-Carlo integration, (v) variables The numbering convention will be performed for all expressions together, so that the same elementary expressions in several expressions will have the same index. """ # Free parameters (to be estimated), sorted by alphabetical order expr = {} for f in self.expressions: d = f.dict_of_elementary_expression( the_type=TypeOfElementaryExpression.FREE_BETA ) expr = dict(expr, **d) self.free_betas = self.expressions_names_indices(expr) self.bounds = [ ( self.free_betas.expressions[b].lb, self.free_betas.expressions[b].ub, ) for b in self.free_betas.names ] self.number_of_free_betas = len(self.free_betas.names) # Fixed parameters (not to be estimated), sorted by alphatical order. expr = {} for f in self.expressions: d = f.dict_of_elementary_expression( the_type=TypeOfElementaryExpression.FIXED_BETA ) expr = dict(expr, **d) self.fixed_betas = self.expressions_names_indices(expr) # Random variables for numerical integration expr = {} for f in self.expressions: d = f.dict_of_elementary_expression( the_type=TypeOfElementaryExpression.RANDOM_VARIABLE ) expr = dict(expr, **d) self.random_variables = self.expressions_names_indices(expr) # Draws expr = {} for f in self.expressions: d = f.dict_of_elementary_expression( the_type=TypeOfElementaryExpression.DRAWS ) expr = dict(expr, **d) self.draws = self.expressions_names_indices(expr) # Variables # Here, we do not extract the variables from the # formulas. Instead, we use all the variables in the database. if self.database is not None: variables_names = list( variables_indices = {} for i, v in enumerate(variables_names): variables_indices[v] = i self.variables = ElementsTuple( expressions=None, indices=variables_indices, names=variables_names, ) else: self.variables = ElementsTuple(expressions=None, indices=None, names=[]) # Merge all the names elementary_expressions_names = ( self.free_betas.names + self.fixed_betas.names + self.random_variables.names + self.draws.names + self.variables.names ) if len(elementary_expressions_names) != len(set(elementary_expressions_names)): duplicates = { x for x in elementary_expressions_names if elementary_expressions_names.count(x) > 1 } error_msg = ( f'The following elementary expressions are defined ' f'more than once: {duplicates}.' ) raise excep.BiogemeError(error_msg) elementary_expressions_indices = { v: i for i, v in enumerate(elementary_expressions_names) } self.elementary_expressions = ElementsTuple( expressions=None, indices=elementary_expressions_indices, names=elementary_expressions_names, ) self.free_betas_values = [ self.free_betas.expressions[x].initValue for x in self.free_betas.names ] self.fixed_betas_values = [ self.fixed_betas.expressions[x].initValue for x in self.fixed_betas.names ] if self.requires_draws: self.database.generateDraws( self.draws.expressions, self.draws.names, self.number_of_draws )
[docs] def setDataMap(self, sample): """Specify the map of the panel data in the expressions :param sample: map of the panel data (see :func:`biogeme.database.Database.buildPanelMap`) :type sample: pandas.DataFrame """ for f in self.expressions: f.cpp.setDataMap(sample)
[docs] def setData(self, sample): """Specify the sample :param sample: map of the panel data (see :func:`biogeme.database.Database.buildPanelMap`) :type sample: pandas.DataFrame """ for f in self.expressions: f.cpp.setData(sample)