Source code for biogeme.results_processing.html_output

"""
Generates an HTML output

Michel Bierlaire
Wed Oct 2 16:10:54 2024
"""

import logging
import os
from datetime import datetime

import numpy as np

import biogeme.version as version
from biogeme.exceptions import BiogemeError
from .estimation_results import (
    EstimateVarianceCovariance,
    EstimationResults,
    calc_p_value,
    calculates_correlation_matrix,
)
from ..parameters import Parameters

logger = logging.getLogger(__name__)


[docs] def format_real_number(value: float) -> str: """Format a real number to be included in the HTML table""" formatted_value = f'{value:.3g}' return formatted_value
[docs] def get_html_header(estimation_results: EstimationResults) -> str: """Prepare the header for the HTML file, containing comments and the version of Biogeme. :return: string containing the header. """ html = '' html += '<html>\n' html += '<head>\n' html += '<script src="http://transp-or.epfl.ch/biogeme/sorttable.js">' '</script>\n' html += '<meta http-equiv="Content-Type" content="text/html; ' 'charset=utf-8" />\n' if estimation_results.raw_estimation_results is None: html += '<p>No estimation result is available.</p>' return html html += ( f'<title>{estimation_results.raw_estimation_results.model_name} - Report from ' f'biogeme {version.get_version()} ' f'[{version.versionDate}]</title>\n' ) html += ( '<meta name="keywords" content="biogeme, discrete choice, ' 'random utility">\n' ) html += ( f'<meta name="description" content="Report from ' f'biogeme {version.get_version()} ' f'[{version.versionDate}]">\n' ) html += '<meta name="author" content="{bv.author}">\n' html += '<style type=text/css>\n' html += '.biostyle\n' html += ' {font-size:10.0pt;\n' html += ' font-weight:400;\n' html += ' font-style:normal;\n' html += ' font-family:Courier;}\n' html += '.boundstyle\n' html += ' {font-size:10.0pt;\n' html += ' font-weight:400;\n' html += ' font-style:normal;\n' html += ' font-family:Courier;\n' html += ' color:red}\n' html += '</style>\n' html += '</head>\n' html += '<body bgcolor="#ffffff">\n' return html
[docs] def get_html_preamble(estimation_results: EstimationResults, file_name: str) -> str: """Generates the first part of the HTML, with the preamble information. :param estimation_results: estimation results :param file_name: name of the HTML file (used only for reporting) :return: HTML code """ now = datetime.now() html = version.get_html() html += f'<p>This file has automatically been generated on {now}</p>\n' if estimation_results.raw_estimation_results is None: html += '<p>No estimation result is available.</p>' return html html += '<table>\n' html += ( f'<tr class=biostyle><td align=right>' f'<strong>Report file</strong>: </td>' f'<td>{file_name}</td></tr>\n' ) html += ( f'<tr class=biostyle><td align=right>' f'<strong>Database name</strong>: </td>' f'<td>{estimation_results.raw_estimation_results.data_name}</td></tr>\n' ) html += '</table>\n' if not estimation_results.algorithm_has_converged: html += '<h2>Algorithm failed to converge</h2>\n' html += ( '<p>It seems that the optimization algorithm did not converge. ' 'Therefore, the results below do not correspond to the maximum ' 'likelihood estimator. Check the specification of the model, ' 'or the criteria for convergence of the algorithm. </p>' ) identification_threshold = Parameters().get_value('identification_threshold') try: if np.abs(estimation_results.smallest_eigenvalue) <= identification_threshold: html += '<h2>Warning: identification issue</h2>\n' html += ( f'<p>The second derivatives matrix is close to singularity. ' f'The smallest eigenvalue is ' f'{np.abs(estimation_results.smallest_eigenvalue):.3g}. This warning is ' f'triggered when it is smaller than the parameter ' f'<code>identification_threshold</code>=' f'{identification_threshold}.</p>' f'<p>Variables involved:' ) html += '<table>' for i, ev in enumerate(estimation_results.smallest_eigenvector): if np.abs(ev) > identification_threshold: html += ( f'<tr><td>{ev:.3g}</td>' f'<td> *</td>' f'<td> {estimation_results.beta_names[i]}</td></tr>\n' ) html += '</table>' html += '</p>\n' except BiogemeError: html += '<h2>Warning: second derivatives matrix not available.</h2>\n' html += '<p>The second derivatives matrix has not been calculated. The statistics requiring ' html += 'it have been generated using the BHHH matrix or the bootstrap samples (if available).</p>' if estimation_results.user_notes is not None: # User notes html += ( f'<blockquote style="border: 2px solid #666; ' f'padding: 10px; background-color:' f' #ccc;">{estimation_results.user_notes}</blockquote>' ) return html
[docs] def get_html_general_statistics(estimation_results: EstimationResults) -> str: """Get the general statistics coded in HTML :return: HTML code """ html = '<table border="0">\n' statistics = estimation_results.get_general_statistics() for description, value in statistics.items(): if value is not None: html += ( f'<tr class=biostyle><td align=right >' f'<strong>{description}</strong>: </td> ' f'<td>{value}</td></tr>\n' ) for key, value in estimation_results.optimization_messages.items(): if key in ( 'Relative projected gradient', 'Relative change', 'Relative gradient', ): html += ( f'<tr class=biostyle><td align=right >' f'<strong>{key}</strong>: </td> ' f'<td>{value:.7g}</td></tr>\n' ) else: html += ( f'<tr class=biostyle><td align=right >' f'<strong>{key}</strong>: </td> ' f'<td>{value}</td></tr>\n' ) html += '</table>\n' return html
[docs] def get_html_one_parameter( estimation_results: EstimationResults, parameter_index: int, variance_covariance_type: EstimateVarianceCovariance, parameter_number=None, parameter_name=None, ): """Generate the HTML code for one row of the table of the estimated parameters. :param estimation_results: estimation results. :param parameter_index: index of the parameter :param variance_covariance_type: type of variance-covariance estimate to be used. :param parameter_number: number of the parameter to report. If None, it is the index. :param parameter_name: name of the parameter to report. If None, taken from estimation results. :return: HTML code for the row """ if parameter_index < 0 or parameter_index >= len(estimation_results.beta_names): error_msg = f'Invalid parameter index {parameter_index}. Valid range: 0- {len(estimation_results.beta_names)-1}' raise ValueError(error_msg) if parameter_number is None: parameter_number = parameter_index if parameter_name is None: parameter_name = estimation_results.beta_names[parameter_index] output = '<tr class=biostyle>' output += f'<td>{parameter_number}</td>' output += f'<td>{parameter_name}</td>' # Value value = estimation_results.get_parameter_value_from_index( parameter_index=parameter_index ) output += f'<td>{format_real_number(value)}</td>' # std err std_err = estimation_results.get_parameter_std_err_from_index( parameter_index=parameter_index, estimate_var_covar=variance_covariance_type ) output += f'<td>{format_real_number(std_err)}</td>' # t-test against 0 t_test = estimation_results.get_parameter_t_test_from_index( parameter_index=parameter_index, estimate_var_covar=variance_covariance_type, target=0, ) output += f'<td>{format_real_number(t_test)}</td>' # p-value against 0 p_value = estimation_results.get_parameter_p_value_from_index( parameter_index=parameter_index, estimate_var_covar=variance_covariance_type, target=0, ) output += f'<td>{format_real_number(p_value)}</td>' if estimation_results.is_bound_active( parameter_name=estimation_results.beta_names[parameter_index] ): output += '<td>Active bound</td>' output += '</tr>' return output
[docs] def get_html_estimated_parameters( estimation_results: EstimationResults, variance_covariance_type: EstimateVarianceCovariance | None = None, renaming_parameters: dict[str, str] | None = None, sort_by_name: bool = False, ) -> str: """Get the estimated parameters coded in HTML :param estimation_results: estimation results. :param variance_covariance_type: type of variance-covariance estimate to be used. :param renaming_parameters: a dict that suggests new names for some or all parameters. :param sort_by_name: if True, parameters are sorted alphabetically by name. :return: HTML code """ if variance_covariance_type is None: variance_covariance_type = ( estimation_results.get_default_variance_covariance_matrix() ) if renaming_parameters is not None: # Verify that the renaming is well-defined. name_values = list(renaming_parameters.values()) if len(name_values) != len(set(name_values)): warning_msg = ( f'The new renaming assigns the same name for multiple parameters. It may not be the ' f'desired action: {renaming_parameters}' ) logger.warning(warning_msg) covar_header = str(variance_covariance_type) html = '<table border="1">\n' html += '<tr class=biostyle>' html += '<th>Id</th>' html += '<th>Name</th>' html += '<th>Value</th>' html += f'<th>{covar_header} std err.</th>' html += f'<th>{covar_header} t-stat.</th>' html += f'<th>{covar_header} p-value</th>' if estimation_results.is_any_bound_active(): html += '<th></th>' html += '</tr>\n' rows = [] for parameter_index, parameter_name in enumerate(estimation_results.beta_names): name = ( renaming_parameters.get(parameter_name) if renaming_parameters is not None else parameter_name ) row_html = ( get_html_one_parameter( estimation_results=estimation_results, parameter_index=parameter_index, variance_covariance_type=variance_covariance_type, parameter_number=parameter_index, parameter_name=name, ) + '\n' ) rows.append((parameter_index, name, row_html)) if sort_by_name: rows.sort(key=lambda x: x[1]) for _, _, row_html in rows: html += row_html html += '</table>' return html
[docs] def get_html_one_pair_of_parameters( estimation_results: EstimationResults, first_parameter_index: int, second_parameter_index: int, variance_covariance_type: EstimateVarianceCovariance, first_parameter_name=None, second_parameter_name=None, ) -> str: """Generate one row of the HTML table of the correlation data for estimated parameters. :param estimation_results: estimation results. :param first_parameter_index: index of the first parameter :param second_parameter_index: index of the second parameter :param variance_covariance_type: type of variance-covariance estimate to be used. :param first_parameter_name: name of the parameter to report. If None, taken from estimation results. :param second_parameter_name: name of the parameter to report. If None, taken from estimation results. :return: one row of the table """ if first_parameter_index < 0 or first_parameter_index >= len( estimation_results.beta_names ): error_msg = ( f'Invalid parameter index {first_parameter_index}. Valid range: 0-' f' {len(estimation_results.beta_names)-1}' ) raise ValueError(error_msg) if second_parameter_index < 0 or second_parameter_index >= len( estimation_results.beta_names ): error_msg = ( f'Invalid parameter index {second_parameter_index}. Valid range: 0-' f' {len(estimation_results.beta_names)-1}' ) raise ValueError(error_msg) if first_parameter_name is None: first_parameter_name = estimation_results.beta_names[first_parameter_index] if second_parameter_name is None: second_parameter_name = estimation_results.beta_names[second_parameter_index] covariance_matrix = estimation_results.get_variance_covariance_matrix( variance_covariance_type=variance_covariance_type ) correlation_matrix = calculates_correlation_matrix(covariance=covariance_matrix) covariance = float(covariance_matrix[first_parameter_index, second_parameter_index]) correlation = float( correlation_matrix[first_parameter_index, second_parameter_index] ) t_test = estimation_results.calculate_test( first_parameter_index, second_parameter_index, covariance_matrix ) p_value = calc_p_value(t_test) the_row = '<tr class=biostyle>' the_row += f'<td>{first_parameter_name}</td>' the_row += f'<td>{second_parameter_name}</td>' the_row += f'<td>{format_real_number(covariance)}</td>' the_row += f'<td>{format_real_number(correlation)}</td>' the_row += f'<td>{format_real_number(t_test)}</td>' the_row += f'<td>{format_real_number(p_value)}</td>' the_row += '</tr>' return the_row
[docs] def get_html_correlation_results( estimation_results: EstimationResults, variance_covariance_type: EstimateVarianceCovariance | None = None, involved_parameters: dict[str, str] | None = None, ) -> str: """Get the correlation results in an HTML format :param estimation_results: estimation results. :param variance_covariance_type: type of variance-covariance estimate to be used. :param involved_parameters: a dict that identifies the parameters to involve, as assign them with a name for the reporting. :return: HTML code """ if variance_covariance_type is None: variance_covariance_type = ( estimation_results.get_default_variance_covariance_matrix() ) covar_header = str(variance_covariance_type) if involved_parameters is None: list_of_parameters = { index: name for index, name in enumerate(estimation_results.beta_names) } else: list_of_parameters = { estimation_results.get_parameter_index(orig_name): new_name for orig_name, new_name in involved_parameters.items() } html = '<table border="1">\n' html += '<tr class=biostyle>' html += '<th>Coefficient 1</th>' html += '<th>Coefficient 2</th>' html += f'<th>{covar_header} covariance</th>' html += f'<th>{covar_header} correlation</th>' html += f'<th>{covar_header} t-test</th>' html += f'<th>{covar_header} p-value</th>' html += '</tr>' for first_parameter_index, first_parameter_name in list_of_parameters.items(): for second_parameter_index, second_parameter_name in list_of_parameters.items(): if first_parameter_index > second_parameter_index: the_row = get_html_one_pair_of_parameters( estimation_results=estimation_results, first_parameter_index=first_parameter_index, second_parameter_index=second_parameter_index, variance_covariance_type=variance_covariance_type, first_parameter_name=first_parameter_name, second_parameter_name=second_parameter_name, ) html += the_row + '\n' html += '</table>' return html
[docs] def get_html_condition_number(estimation_results: EstimationResults) -> str: """Report the smallest and largest eigenvalues, and the condition number. :param estimation_results: estimation results :return: HTML code """ html = ( f'<p>Smallest eigenvalue: ' f'{estimation_results.smallest_eigenvalue:.6g}</p>\n' ) html += ( f'<p>Largest eigenvalue: ' f'{estimation_results.largest_eigenvalue:.6g}</p>\n' ) html += f'<p>Condition number: ' f'{estimation_results.condition_number:.6g}</p>\n' return html
[docs] def generate_html_file( estimation_results: EstimationResults, filename: str, overwrite=False, variance_covariance_type: EstimateVarianceCovariance | None = None, ) -> None: """Generate an HTML file with the estimation results :param estimation_results: estimation results :param filename: name of the file :param overwrite: if True and the file exists, it is overwritten :param variance_covariance_type: select which type of variance-covariance matrix is used to generate the statistics. If None, the bootstrap one is used if available. If not available, the robust one. """ if variance_covariance_type is None: variance_covariance_type = ( estimation_results.get_default_variance_covariance_matrix() ) if ( variance_covariance_type == EstimateVarianceCovariance.BOOTSTRAP and estimation_results.bootstrap_time is None ): logger.warning( f'No bootstrap data is available. The robust variance-covariance matrix is used instead.' ) variance_covariance_type = EstimateVarianceCovariance.ROBUST if not overwrite and os.path.exists(filename): raise FileExistsError(f"The file '{filename}' already exists.") with open(filename, 'w') as file: header = get_html_header(estimation_results=estimation_results) preamble = get_html_preamble( estimation_results=estimation_results, file_name=filename ) general_statistics = get_html_general_statistics( estimation_results=estimation_results ) parameters = get_html_estimated_parameters( estimation_results=estimation_results, sort_by_name=True, variance_covariance_type=variance_covariance_type, ) correlation_results = get_html_correlation_results( estimation_results=estimation_results, variance_covariance_type=variance_covariance_type, ) footer = get_html_footer() print(header, file=file) print(preamble, file=file) print('<h1>Estimation report</h1>', file=file) print(general_statistics, file=file) print('<h1>Estimated parameters</h1>', file=file) print(parameters, file=file) print('<h2>Correlation of coefficients</h2>', file=file) print(correlation_results, file=file) try: condition_number = get_html_condition_number( estimation_results=estimation_results ) print(condition_number, file=file) except BiogemeError: ... print(footer, file=file) logger.info(f'File {filename} has been generated.')