Source code for biogeme.results_processing.pandas_output

"""
Generates the estimation results in Pandas

Michel Bierlaire
Wed Oct 2 06:43:33 2024
"""

import logging

import numpy as np
import pandas as pd

from .estimation_results import (
    EstimateVarianceCovariance,
    EstimationResults,
    calc_p_value,
    calculates_correlation_matrix,
)
from ..exceptions import BiogemeError

logger = logging.getLogger(__name__)


[docs] def get_pandas_one_parameter( estimation_results: EstimationResults, parameter_index: int, variance_covariance_type: EstimateVarianceCovariance, parameter_number=None, parameter_name=None, ) -> dict[str, float | int | str]: """Generate one row of the Pandas table of the estimated parameters. :param estimation_results: estimation results. :param parameter_index: index of the parameter :param variance_covariance_type: type of variance-covariance estimate to be used. :param parameter_number: number of the parameter to report. If None, it is the index. :param parameter_name: name of the parameter to report. If None, taken from estimation results. :return: one row of the table """ if parameter_index < 0 or parameter_index >= len(estimation_results.beta_names): error_msg = f'Invalid parameter index {parameter_index}. Valid range: 0- {len(estimation_results.beta_names)-1}' raise ValueError(error_msg) if parameter_number is None: parameter_number = parameter_index if parameter_name is None: parameter_name = estimation_results.beta_names[parameter_index] covar_header = covar_header = str(variance_covariance_type) value = estimation_results.get_parameter_value_from_index( parameter_index=parameter_index ) std_err = ( estimation_results.get_parameter_std_err_from_index( parameter_index=parameter_index, estimate_var_covar=variance_covariance_type ) if estimation_results.are_derivatives_available else np.nan ) t_test = ( estimation_results.get_parameter_t_test_from_index( parameter_index=parameter_index, estimate_var_covar=variance_covariance_type, target=0, ) if estimation_results.are_derivatives_available else np.nan ) p_value = ( estimation_results.get_parameter_p_value_from_index( parameter_index=parameter_index, estimate_var_covar=variance_covariance_type, target=0, ) if estimation_results.are_derivatives_available else np.nan ) the_row = { '#': parameter_number, 'Name': parameter_name, 'Value': value, f'{covar_header} std err.': std_err, f'{covar_header} t-stat.': t_test, f'{covar_header} p-value': p_value, } if estimation_results.is_any_bound_active(): the_row['Active bound'] = estimation_results.is_bound_active( parameter_name=estimation_results.beta_names[parameter_index] ) return the_row
[docs] def get_pandas_estimated_parameters( estimation_results: EstimationResults, variance_covariance_type: EstimateVarianceCovariance | None = None, renumbering_parameters: dict[int, int] | None = None, renaming_parameters: dict[str, str] | None = None, ) -> pd.DataFrame: """Get the estimated parameters as a pandas data frame :param estimation_results: estimation results. :param variance_covariance_type: type of variance-covariance estimate to be used. :param renumbering_parameters: a dict that suggests new numbers for parameters :param renaming_parameters: a dict that suggests new names for some or all parameters. :param variance_covariance_type: select which type of variance-covariance matrix is used to generate the statistics. If None, the bootstrap one is used if available. If not available, the robust one. :return: a Pandas data frame """ if variance_covariance_type is None: variance_covariance_type = ( estimation_results.get_default_variance_covariance_matrix() ) if ( variance_covariance_type == EstimateVarianceCovariance.BOOTSTRAP and estimation_results.bootstrap_time is None ): logger.warning( f'No bootstrap data is available. The robust variance-covariance matrix is used instead.' ) variance_covariance_type = EstimateVarianceCovariance.ROBUST if renumbering_parameters is not None: # Verify that the numbering is well defined number_values = list(renumbering_parameters.values()) if len(number_values) != len(set(number_values)): error_msg = f'The new numbering cannot assign the same number to two different parameters.' raise BiogemeError(error_msg) if renaming_parameters is not None: # Verify that the renaming is well defined. name_values = list(renaming_parameters.values()) if len(name_values) != len(set(name_values)): warning_msg = f'The new renaming assigns the same name for multiple parameters. It may not be the desired action.' logger.warning(warning_msg) all_rows = {} for parameter_index, parameter_name in enumerate(estimation_results.beta_names): new_number = ( renumbering_parameters.get(parameter_index) if renumbering_parameters is not None else parameter_index ) new_name = ( renaming_parameters.get(parameter_name) if renaming_parameters is not None else estimation_results.beta_names[parameter_index] ) the_row = get_pandas_one_parameter( estimation_results=estimation_results, parameter_index=parameter_index, variance_covariance_type=variance_covariance_type, parameter_number=new_number, parameter_name=new_name, ) all_rows[new_number] = the_row list_of_all_rows = [all_rows[a_row_number] for a_row_number in sorted(all_rows)] the_frame = pd.DataFrame(list_of_all_rows) the_frame.set_index('#', inplace=True) the_frame.index.name = None return the_frame
[docs] def get_pandas_one_pair_of_parameters( estimation_results: EstimationResults, first_parameter_index: int, second_parameter_index: int, variance_covariance_type: EstimateVarianceCovariance, first_parameter_name=None, second_parameter_name=None, ) -> dict[str, float | int | str]: """Generate one row of the Pandas table of the correlation data for estimated parameters. :param estimation_results: estimation results. :param first_parameter_index: index of the first parameter :param second_parameter_index: index of the second parameter :param variance_covariance_type: type of variance-covariance estimate to be used. :param first_parameter_name: name of the parameter to report. If None, taken from estimation results. :param second_parameter_name: name of the parameter to report. If None, taken from estimation results. :return: one row of the table """ if first_parameter_index < 0 or first_parameter_index >= len( estimation_results.beta_names ): error_msg = ( f'Invalid parameter index {first_parameter_index}. Valid range: 0-' f' {len(estimation_results.beta_names)-1}' ) raise ValueError(error_msg) if second_parameter_index < 0 or second_parameter_index >= len( estimation_results.beta_names ): error_msg = ( f'Invalid parameter index {second_parameter_index}. Valid range: 0-' f' {len(estimation_results.beta_names)-1}' ) raise ValueError(error_msg) if first_parameter_name is None: first_parameter_name = estimation_results.beta_names[first_parameter_index] if second_parameter_name is None: second_parameter_name = estimation_results.beta_names[second_parameter_index] covar_header = str(variance_covariance_type) covariance_matrix = estimation_results.get_variance_covariance_matrix( variance_covariance_type=variance_covariance_type ) correlation_matrix = calculates_correlation_matrix(covariance=covariance_matrix) covariance = covariance_matrix[first_parameter_index, second_parameter_index] correlation = correlation_matrix[first_parameter_index, second_parameter_index] t_test = estimation_results.calculate_test( first_parameter_index, second_parameter_index, covariance_matrix ) p_value = calc_p_value(t_test) the_row = { 'First parameter': first_parameter_name, 'Second parameter': second_parameter_name, f'{covar_header} covariance': covariance, f'{covar_header} correlation': correlation, f'{covar_header} t-stat.': t_test, f'{covar_header} p-value': p_value, } return the_row
[docs] def get_pandas_correlation_results( estimation_results: EstimationResults, variance_covariance_type: EstimateVarianceCovariance | None = None, involved_parameters: dict[str, str] | None = None, ) -> pd.DataFrame: """Get the correlation results in a Pandas data frame :param estimation_results: estimation results. :param variance_covariance_type: type of variance-covariance estimate to be used. :param involved_parameters: a dict that identifies the parameters to involve, as assign them with a name for the reporting. :return: a Pandas data frame """ if variance_covariance_type is None: variance_covariance_type = ( estimation_results.get_default_variance_covariance_matrix() ) if involved_parameters is None: list_of_parameters = { index: name for index, name in enumerate(estimation_results.beta_names) } else: list_of_parameters = { estimation_results.get_parameter_index(orig_name): new_name for orig_name, new_name in involved_parameters.items() } list_of_rows = [] for first_parameter_index, first_parameter_name in list_of_parameters.items(): for second_parameter_index, second_parameter_name in list_of_parameters.items(): if first_parameter_index > second_parameter_index: the_row = get_pandas_one_pair_of_parameters( estimation_results=estimation_results, first_parameter_index=first_parameter_index, second_parameter_index=second_parameter_index, variance_covariance_type=variance_covariance_type, first_parameter_name=first_parameter_name, second_parameter_name=second_parameter_name, ) list_of_rows.append(the_row) the_frame = pd.DataFrame(list_of_rows) return the_frame