Source code for biogeme.results_processing.f12_output

"""
Generates a F12 output for ALOGIT

Michel Bierlaire
Thu Oct 3 10:09:52 2024
"""

import datetime
import logging
import os

from biogeme.version import get_version
from .estimation_results import (
    EstimateVarianceCovariance,
    EstimationResults,
    calculates_correlation_matrix,
)
from .pandas_output import get_pandas_estimated_parameters

logger = logging.getLogger(__name__)


[docs] def get_f12( estimation_results: EstimationResults, variance_covariance_type: EstimateVarianceCovariance | None = None, ) -> str: """F12 is a format used by the software ALOGIT to report estimation results. :param estimation_results: estimation results. :param variance_covariance_type: type of variance-covariance estimate to be used. :return: results formatted in F12 format """ if variance_covariance_type is None: variance_covariance_type = estimation_results.get_default_variance_covariance_matrix() covar_header = str(variance_covariance_type) # checkline1 = ( # '0000000001111111111222222222233333333334444444444' # '5555555555666666666677777777778' # ) # checkline2 = ( # '1234567890123456789012345678901234567890123456789' # '0123456789012345678901234567890' # ) results = '' # results += f'{checkline1}\n' # results += f'{checkline2}\n' # Line 1, title, characters 1-79 results += f'{estimation_results.model_name[:79]: >79}\n' # Line 2, subtitle, characters 1-27, and time-date, characters 57-77 t = f'From biogeme {get_version()}' d = f'{datetime.datetime.now()}'[:19] results += f'{t[:27]: <56}{d: <21}\n' # Line 3, "END" (this is historical!) results += 'END\n' # results += f'{checkline1}\n' # results += f'{checkline2}\n' # Line 4-(K+3), coefficient values # characters 1-4, " 0" (again historical) # characters 6-15, coefficient label, suggest using first 10 # characters of label in R # characters 16-17, " F" (this indicates whether or not the # coefficient is constrained) # characters 19-38, coefficient value 20 chars # characters 39-58, standard error 20 chars # mystats = estimation_results.get_general_statistics() table = get_pandas_estimated_parameters( estimation_results=estimation_results, variance_covariance_type=variance_covariance_type, ) parameters_indices = table.index.to_list() for parameter_index in parameters_indices: values = table.loc[parameter_index] name = values['Name'] results += ' 0 ' results += f'{name[:10]: >10}' if 'Active bound' in values: if values['Active bound'] == 1: results += ' T' else: results += ' F' else: results += ' F' results += ' ' results += f' {values["Value"]: >+19.12e}' column_name = f'{covar_header} std err.' results += f' {values[column_name]: >+19.12e}' results += '\n' # Line K+4, " -1" indicates end of coefficients results += ' -1\n' # results += f'{checkline1}\n' # results += f'{checkline2}\n' # Line K+5, statistics about run # characters 1-8, number of observations 8 chars # characters 9-27, likelihood-with-constants 19 chars # characters 28-47, null likelihood 20 chars # characters 48-67, final likelihood 20 chars results += f'{estimation_results.sample_size: >8}' # The cte log likelihood is not available. We put 0 instead. results += f' {0: >18}' if estimation_results.null_log_likelihood is not None: results += f' {estimation_results.null_log_likelihood: >+19.12e}' else: results += f' {0: >19}' results += f' {estimation_results.final_log_likelihood: >+19.12e}' results += '\n' # results += f'{checkline1}\n' # results += f'{checkline2}\n' # Line K+6, more statistics # characters 1-4, number of iterations (suggest use 0) 4 chars # characters 5-8, error code (please use 0) 4 chars # characters 9-29, time and date (sugg. repeat from line 2) 21 chars if "Number of iterations" in estimation_results.optimization_messages: results += ( f'{estimation_results.optimization_messages["Number of iterations"]: >4}' ) else: results += f'{0: >4}' results += f'{0: >4}' results += f'{d: >21}' results += '\n' # results += f'{checkline1}\n' # results += f'{checkline2}\n' # Lines (K+7)-however many we need, correlations*100000 # 10 per line, fields of width 7 # The order of these is that correlation i,j (i>j) is in position # (i-1)*(i-2)/2+j, i.e. # (2,1) (3,1) (3,2) (4,1) etc. count = 0 variance_covariance_matrix = estimation_results.get_variance_covariance_matrix( variance_covariance_type=variance_covariance_type ) correlation_matrix = calculates_correlation_matrix( covariance=variance_covariance_matrix ) for i, coefi in enumerate(estimation_results.beta_names): for j in range(0, i): try: corr = int(100000 * correlation_matrix[i][j]) except OverflowError: corr = 999999 results += f'{corr:7d}' count += 1 if count % 10 == 0: results += '\n' results += '\n' return results
[docs] def generate_f12_file( estimation_results: EstimationResults, filename: str, overwrite=False, variance_covariance_type: EstimateVarianceCovariance | None = None, ) -> None: """Generate a F12 file with the estimation results :param estimation_results: estimation results :param filename: name of the file :param overwrite: if True and the file exists, it is overwritten :param variance_covariance_type: select which type of variance-covariance matrix is used to generate the statistics. If None, the bootstrap one is used if available. If not available, the robust one. """ if variance_covariance_type is None: variance_covariance_type = ( estimation_results.get_default_variance_covariance_matrix() ) if ( variance_covariance_type == EstimateVarianceCovariance.BOOTSTRAP and estimation_results.bootstrap_time is None ): logger.warning( f'No bootstrap data is available. The robust variance-covariance matrix is used instead.' ) variance_covariance_type = EstimateVarianceCovariance.ROBUST if not overwrite and os.path.exists(filename): raise FileExistsError(f"The file '{filename}' already exists.") with open(filename, 'w') as file: content = get_f12( estimation_results=estimation_results, variance_covariance_type=variance_covariance_type, ) print(content, file=file) logger.info(f'File {filename} has been generated.')