Source code for biogeme.results_processing.f12_output

"""
Generates a F12 output for ALOGIT

Michel Bierlaire
Thu Oct 3 10:09:52 2024
"""

import datetime
import logging
import os

from biogeme.version import get_version
from .estimation_results import (
    EstimateVarianceCovariance,
    EstimationResults,
    calculates_correlation_matrix,
)
from .pandas_output import get_pandas_estimated_parameters

logger = logging.getLogger(__name__)



[docs]
def get_f12(
    estimation_results: EstimationResults,
    variance_covariance_type: EstimateVarianceCovariance | None = None,
) -> str:
    """F12 is a format used by the software ALOGIT to
    report estimation results.

    :param estimation_results: estimation results.
    :param variance_covariance_type: type of variance-covariance estimate to be used.
    :return: results formatted in F12 format
    """
    if variance_covariance_type is None:
        variance_covariance_type = estimation_results.get_default_variance_covariance_matrix()
    covar_header = str(variance_covariance_type)

    # checkline1 = (
    #    '0000000001111111111222222222233333333334444444444'
    #    '5555555555666666666677777777778'
    # )
    # checkline2 = (
    #    '1234567890123456789012345678901234567890123456789'
    #    '0123456789012345678901234567890'
    # )

    results = ''

    # results += f'{checkline1}\n'
    # results += f'{checkline2}\n'

    # Line 1, title, characters 1-79
    results += f'{estimation_results.model_name[:79]: >79}\n'

    # Line 2, subtitle, characters 1-27, and time-date, characters 57-77
    t = f'From biogeme {get_version()}'
    d = f'{datetime.datetime.now()}'[:19]
    results += f'{t[:27]: <56}{d: <21}\n'

    # Line 3, "END" (this is historical!)
    results += 'END\n'

    # results += f'{checkline1}\n'
    # results += f'{checkline2}\n'

    # Line 4-(K+3), coefficient values
    #  characters 1-4, "   0" (again historical)
    #  characters 6-15, coefficient label, suggest using first 10
    #      characters of label in R
    #  characters 16-17, " F" (this indicates whether or not the
    #      coefficient is constrained)
    #  characters 19-38, coefficient value   20 chars
    #  characters 39-58, standard error      20 chars

    # mystats = estimation_results.get_general_statistics()
    table = get_pandas_estimated_parameters(
        estimation_results=estimation_results,
        variance_covariance_type=variance_covariance_type,
    )
    parameters_indices = table.index.to_list()
    for parameter_index in parameters_indices:
        values = table.loc[parameter_index]
        name = values['Name']
        results += '   0 '
        results += f'{name[:10]: >10}'
        if 'Active bound' in values:
            if values['Active bound'] == 1:
                results += ' T'
            else:
                results += ' F'
        else:
            results += ' F'
        results += ' '
        results += f' {values["Value"]: >+19.12e}'
        column_name = f'{covar_header} std err.'
        results += f' {values[column_name]: >+19.12e}'
        results += '\n'

    # Line K+4, "  -1" indicates end of coefficients
    results += '  -1\n'

    # results += f'{checkline1}\n'
    # results += f'{checkline2}\n'

    # Line K+5, statistics about run
    #   characters 1-8, number of observations        8 chars
    #   characters 9-27, likelihood-with-constants   19 chars
    #   characters 28-47, null likelihood            20 chars
    #   characters 48-67, final likelihood           20 chars

    results += f'{estimation_results.sample_size: >8}'
    # The cte log likelihood is not available. We put 0 instead.
    results += f' {0: >18}'
    if estimation_results.null_log_likelihood is not None:
        results += f' {estimation_results.null_log_likelihood: >+19.12e}'
    else:
        results += f' {0: >19}'
    results += f' {estimation_results.final_log_likelihood: >+19.12e}'
    results += '\n'

    # results += f'{checkline1}\n'
    # results += f'{checkline2}\n'

    # Line K+6, more statistics
    #   characters 1-4, number of iterations (suggest use 0)        4 chars
    #   characters 5-8, error code (please use 0)                   4 chars
    #   characters 9-29, time and date (sugg. repeat from line 2)  21 chars

    if "Number of iterations" in estimation_results.optimization_messages:
        results += (
            f'{estimation_results.optimization_messages["Number of iterations"]: >4}'
        )
    else:
        results += f'{0: >4}'
    results += f'{0: >4}'
    results += f'{d: >21}'
    results += '\n'

    # results += f'{checkline1}\n'
    # results += f'{checkline2}\n'

    # Lines (K+7)-however many we need, correlations*100000
    #   10 per line, fields of width 7
    #   The order of these is that correlation i,j (i>j) is in position
    #   (i-1)*(i-2)/2+j, i.e.
    #   (2,1) (3,1) (3,2) (4,1) etc.

    count = 0
    variance_covariance_matrix = estimation_results.get_variance_covariance_matrix(
        variance_covariance_type=variance_covariance_type
    )
    correlation_matrix = calculates_correlation_matrix(
        covariance=variance_covariance_matrix
    )
    for i, coefi in enumerate(estimation_results.beta_names):
        for j in range(0, i):
            try:
                corr = int(100000 * correlation_matrix[i][j])
            except OverflowError:
                corr = 999999
            results += f'{corr:7d}'
            count += 1
            if count % 10 == 0:
                results += '\n'
    results += '\n'
    return results




[docs]
def generate_f12_file(
    estimation_results: EstimationResults,
    filename: str,
    overwrite=False,
    variance_covariance_type: EstimateVarianceCovariance | None = None,
) -> None:
    """Generate a F12 file with the estimation results

    :param estimation_results: estimation results
    :param filename: name of the file
    :param overwrite: if True and the file exists, it is overwritten
    :param variance_covariance_type: select which type of variance-covariance matrix is used to generate the
        statistics. If None, the bootstrap one is used if available. If not available, the robust one.
    """
    if variance_covariance_type is None:
        variance_covariance_type = (
            estimation_results.get_default_variance_covariance_matrix()
        )
    if (
        variance_covariance_type == EstimateVarianceCovariance.BOOTSTRAP
        and estimation_results.bootstrap_time is None
    ):
        logger.warning(
            f'No bootstrap data is available. The robust variance-covariance matrix is used instead.'
        )
        variance_covariance_type = EstimateVarianceCovariance.ROBUST

    if not overwrite and os.path.exists(filename):
        raise FileExistsError(f"The file '{filename}' already exists.")

    with open(filename, 'w') as file:
        content = get_f12(
            estimation_results=estimation_results,
            variance_covariance_type=variance_covariance_type,
        )
        print(content, file=file)
    logger.info(f'File {filename} has been generated.')