Source code for biogeme.model_elements.audit

import pandas as pd

from biogeme.audit_tuple import AuditTuple
from biogeme.database import Database
from biogeme.default_parameters import MISSING_VALUE
from biogeme.expressions import (
    Expression,
    ExpressionOrNumeric,
    PanelLikelihoodTrajectory,
    Variable,
    list_of_variables_in_expression,
)

CHOICE_LABEL = 'Choice'
AVAILABILITY_LABEL = 'Avail. '


[docs] def audit_variables(expression: Expression, database: Database) -> AuditTuple: all_variables: list[Variable] = list_of_variables_in_expression(expression) list_of_errors = [] list_of_warnings = [] for variable in all_variables: if variable.name not in database.dataframe.columns: error_msg = f'Variable "{variable.name}" not found in the database. Available variables: {database.dataframe.columns}' list_of_errors.append(error_msg) return AuditTuple(errors=list_of_errors, warnings=list_of_warnings)
[docs] def audit_panel(expression: Expression, database: Database) -> AuditTuple: list_of_errors = [] list_of_warnings = [] if not database.is_panel(): return AuditTuple(errors=list_of_errors, warnings=list_of_warnings) all_variables: list[Variable] = list_of_variables_in_expression(expression) if all_variables and not expression.embed_expression(PanelLikelihoodTrajectory): error_msg = ( f'Expression {expression} does not contain "PanelLikelihoodTrajectory" although the data has been ' f'declared to have a panel structure.' ) list_of_errors.append(error_msg) return AuditTuple(errors=list_of_errors, warnings=list_of_warnings)
[docs] def audit_chosen_alternative( choice: ExpressionOrNumeric, availability: dict[int, ExpressionOrNumeric], database: Database, use_jit: bool, ) -> AuditTuple: from .model_elements import ModelElements from biogeme.calculator import MultiRowEvaluator """Checks all the rows in the database such that the chosen alternative is not available""" list_of_errors = [] dict_of_expressions = {CHOICE_LABEL: choice} | { f'{AVAILABILITY_LABEL}{alt_id:.1f}': the_expression for alt_id, the_expression in availability.items() } model_elements = ModelElements( expressions=dict_of_expressions, database=database, use_jit=use_jit ) the_evaluator: MultiRowEvaluator = MultiRowEvaluator( model_elements=model_elements, numerically_safe=True, use_jit=use_jit ) results: pd.DataFrame = the_evaluator.evaluate({}) def chosen_unavailable(row): choice_id = row[CHOICE_LABEL] if choice_id == MISSING_VALUE: return False return row[f'{AVAILABILITY_LABEL}{choice_id:.1f}'] == 0 invalid_indices = results.apply(chosen_unavailable, axis=1) problematic_rows = results[invalid_indices] list_of_warnings = [ f'Row index {idx}: chosen alternative {row[CHOICE_LABEL]} is not available' for idx, row in problematic_rows.iterrows() ] return AuditTuple(errors=list_of_errors, warnings=list_of_warnings)