Source code for biogeme.results_processing.compilation

"""
Compilation of estimation results

Michel Bierlaire
Thu Oct 3 18:54:13 2024
"""

import glob
import logging
import os

import pandas as pd

from biogeme.results_processing.estimation_results import (
    EstimationResults,
    EstimateVarianceCovariance,
)
from biogeme.tools import ModelNames

logger = logging.getLogger(__name__)


[docs] def compile_estimation_results( dict_of_results: dict[str, EstimationResults | str], variance_covariance_type: EstimateVarianceCovariance = EstimateVarianceCovariance.ROBUST, statistics: tuple[str, ...] = ( 'Number of estimated parameters', 'Sample size', 'Final log likelihood', 'Akaike Information Criterion', 'Bayesian Information Criterion', ), include_parameter_estimates: bool = True, include_stderr: bool = False, include_t_test: bool = True, formatted: bool = True, use_short_names: bool = False, ) -> tuple[pd.DataFrame, dict[str, str]]: """Compile estimation results into a common table :param dict_of_results: dict of results, containing for each model the name, the ID and the results, or the name of the pickle file containing them. :param variance_covariance_type: type of variance-covariance estimate to be used. :param statistics: list of statistics to include in the summary table :param include_parameter_estimates: if True, the parameter estimates are included. :param include_stderr: if True, the robust standard errors of the parameters are included. :param include_t_test: if True, the t-test of the parameters are included. :param formatted: if True, a formatted string in included in the table results. If False, the numerical values are stored. Use "True" if you need to print the results. Use "False" if you need to use them for further calculation. :param use_short_names: if True, short names, such as Model_1, Model_2, are used to identify the model. It is nicer on for the reporting. :return: pandas dataframe with the requested results, and a dictionary reporting the specification of each model """ model_names = ModelNames() def the_name(col: str) -> str: """Replace the name of a model by a shorter version for reporting :param col: name of the column, that is, name of the model. :return: name to be used in the reporting. """ if use_short_names: return model_names(col) return col columns = [the_name(k) for k in dict_of_results.keys()] df = pd.DataFrame(columns=columns) configurations = {the_name(col): col for col in dict_of_results.keys()} for model, estimation_results in dict_of_results.items(): if use_short_names: col = model_names(model) else: col = model if not isinstance(estimation_results, EstimationResults): try: estimation_results = EstimationResults.from_yaml_file( filename=estimation_results ) except FileNotFoundError: warning = f'Impossible to access result file {estimation_results}' logger.warning(warning) estimation_results = None if estimation_results is not None: stats_results = estimation_results.get_general_statistics() for s in statistics: df.loc[s, col] = stats_results[s] if include_parameter_estimates: for ( parameter_index, parameter_name, ) in enumerate(estimation_results.beta_names): parameter_value = estimation_results.get_parameter_value_from_index( parameter_index=parameter_index ) std_err_value = estimation_results.get_parameter_std_err_from_index( parameter_index=parameter_index, estimate_var_covar=variance_covariance_type, ) t_test_value = estimation_results.get_parameter_t_test_from_index( parameter_index=parameter_index, estimate_var_covar=variance_covariance_type, ) if formatted: std_err_report = ( f'({std_err_value:.3g})' if include_stderr else '' ) t_test_report = ( f'({t_test_value:.3g})' if include_t_test else '' ) the_value = ( f'{parameter_value:.3g} {std_err_report} {t_test_report}' ) row_std = ' (std)' if include_stderr else '' row_t_test = ' (t-test)' if include_t_test else '' row_title = f'{parameter_name}{row_std}{row_t_test}' df.loc[row_title, col] = the_value else: df.loc[parameter_name, col] = parameter_value if include_stderr: df.loc[f'{parameter_name} (std)', col] = std_err_value if include_t_test: df.loc[f'{parameter_name} (t-test)', col] = t_test_value return df.fillna(''), configurations
[docs] def compile_results_in_directory( statistics: tuple[str, ...] = ( 'Number of estimated parameters', 'Sample size', 'Final log likelihood', 'Akaike Information Criterion', 'Bayesian Information Criterion', ), file_extension: str = 'yaml', variance_covariance_type: EstimateVarianceCovariance = EstimateVarianceCovariance.ROBUST, include_parameter_estimates: bool = True, include_stderr: bool = False, include_t_test: bool = True, formatted: bool = True, use_short_names: bool = False, ) -> tuple[pd.DataFrame, dict[str, str]] | None: """Compile estimation results found in the local directory into a common table. The results are supposed to be in a file with pickle extension. :param statistics: list of statistics to include in the summary table :param file_extension: extension of the files containing the estimation results. :param variance_covariance_type: type of variance-covariance estimate to be used. :param include_parameter_estimates: if True, the parameter estimates are included. :param include_stderr: if True, the robust standard errors of the parameters are included. :param include_t_test: if True, the t-test of the parameters are included. :param formatted: if True, a formatted string in included in the table results. If False, the numerical values are stored. Use "True" if you need to print the results. Use "False" if you need to use them for further calculation. :param use_short_names: if True, short names, such as Model_1, Model_2, are used to identify the model. It is nicer on for the reporting. :return: pandas dataframe with the requested results, and a dictionary reporting the specification of each model """ files = glob.glob(f'*.{file_extension}') if not files: logger.warning(f'No .{file_extension} file found in {os.getcwd()}') return None the_dict = {k: k for k in files} return compile_estimation_results( dict_of_results=the_dict, variance_covariance_type=variance_covariance_type, statistics=statistics, include_parameter_estimates=include_parameter_estimates, include_stderr=include_stderr, include_t_test=include_t_test, formatted=formatted, use_short_names=use_short_names, )