Source code for biogeme.validation.cross_validation
import logging
from dataclasses import dataclass
import pandas as pd
from biogeme.calculator import CompiledFormulaEvaluator, MultiRowEvaluator
from biogeme.default_parameters import ParameterValue
from biogeme.likelihood import AlgorithmResults, model_estimation
from biogeme.model_elements import ModelElements
from biogeme.optimization import OptimizationAlgorithm
from .split_databases import EstimationValidationModels, split_databases
logger = logging.getLogger(__name__)
[docs]
@dataclass
class ValidationResult:
estimation_modeling_elements: ModelElements
validation_modeling_elements: ModelElements
simulated_values: pd.DataFrame
[docs]
def cross_validate_model(
the_algorithm: OptimizationAlgorithm,
modeling_elements: ModelElements,
parameters: dict[str, ParameterValue],
starting_values: dict[str, float],
slices: int,
numerically_safe: bool,
groups: str | None = None,
) -> list[ValidationResult]:
validation_models: list[EstimationValidationModels] = split_databases(
model_elements=modeling_elements, slices=slices, groups=groups
)
results = []
for i, fold in enumerate(validation_models, 1):
# Estimation phase
the_function_evaluator = CompiledFormulaEvaluator(
model_elements=fold.estimation,
second_derivatives_mode=parameters['calculating_second_derivatives'],
numerically_safe=numerically_safe,
)
one_result: AlgorithmResults = model_estimation(
the_algorithm=the_algorithm,
function_evaluator=the_function_evaluator,
parameters=parameters,
some_starting_values=starting_values,
save_iterations_filename=None,
)
estimated_betas = fold.estimation.expressions_registry.get_named_betas_values(
values=one_result.solution
)
simulation_evaluator = MultiRowEvaluator(
model_elements=fold.validation,
numerically_safe=numerically_safe,
use_jit=modeling_elements.use_jit,
)
simulated_values: pd.DataFrame = simulation_evaluator.evaluate(
the_betas=estimated_betas
)
result = ValidationResult(
estimation_modeling_elements=fold.estimation,
validation_modeling_elements=fold.validation,
simulated_values=simulated_values,
)
results.append(result)
return results