Source code for biogeme.model_elements.audit
import pandas as pd
from biogeme.audit_tuple import AuditTuple
from biogeme.database import Database
from biogeme.default_parameters import MISSING_VALUE
from biogeme.expressions import (
Expression,
ExpressionOrNumeric,
PanelLikelihoodTrajectory,
Variable,
list_of_variables_in_expression,
)
CHOICE_LABEL = 'Choice'
AVAILABILITY_LABEL = 'Avail. '
[docs]
def audit_variables(expression: Expression, database: Database) -> AuditTuple:
all_variables: list[Variable] = list_of_variables_in_expression(expression)
list_of_errors = []
list_of_warnings = []
for variable in all_variables:
if variable.name not in database.dataframe.columns:
error_msg = f'Variable "{variable.name}" not found in the database. Available variables: {database.dataframe.columns}'
list_of_errors.append(error_msg)
return AuditTuple(errors=list_of_errors, warnings=list_of_warnings)
[docs]
def audit_panel(expression: Expression, database: Database) -> AuditTuple:
list_of_errors = []
list_of_warnings = []
if not database.is_panel():
return AuditTuple(errors=list_of_errors, warnings=list_of_warnings)
all_variables: list[Variable] = list_of_variables_in_expression(expression)
if all_variables and not expression.embed_expression(PanelLikelihoodTrajectory):
error_msg = (
f'Expression {expression} does not contain "PanelLikelihoodTrajectory" although the data has been '
f'declared to have a panel structure.'
)
list_of_errors.append(error_msg)
return AuditTuple(errors=list_of_errors, warnings=list_of_warnings)
[docs]
def audit_chosen_alternative(
choice: ExpressionOrNumeric,
availability: dict[int, ExpressionOrNumeric],
database: Database,
use_jit: bool,
) -> AuditTuple:
from .model_elements import ModelElements
from biogeme.jax_calculator import MultiRowEvaluator
"""Checks all the rows in the database such that the chosen alternative is not available"""
list_of_errors = []
dict_of_expressions = {CHOICE_LABEL: choice} | {
f'{AVAILABILITY_LABEL}{alt_id:.1f}': the_expression
for alt_id, the_expression in availability.items()
}
model_elements = ModelElements(
expressions=dict_of_expressions, database=database, use_jit=use_jit
)
the_evaluator: MultiRowEvaluator = MultiRowEvaluator(
model_elements=model_elements, numerically_safe=True, use_jit=use_jit
)
results: pd.DataFrame = the_evaluator.evaluate({})
def chosen_unavailable(row):
choice_id = row[CHOICE_LABEL]
if choice_id == MISSING_VALUE:
return False
return row[f'{AVAILABILITY_LABEL}{choice_id:.1f}'] == 0
invalid_indices = results.apply(chosen_unavailable, axis=1)
problematic_rows = results[invalid_indices]
list_of_warnings = [
f'Row index {idx}: chosen alternative {row[CHOICE_LABEL]} is not available'
for idx, row in problematic_rows.iterrows()
]
return AuditTuple(errors=list_of_errors, warnings=list_of_warnings)